R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. Html_fragmented is quicker to load than html, hence for this pipeline which genenerated a large mount of data, we have used html_fragment to report Rmarkdown.

Checklist before starting run of this pipeline

#1) The input data for this code was obtained from htseq-count data obtained from processing raw fastq files #2) Input expression files are SRR(number).txt htseq-count from RNA-seq analysis folder for GSE61915, GSE73503 and GSE83931 #3) metadata file made by manually combining individual metadata files form GEO to give ‘GSE61915_GSE73503_GSE83931_metadata_edit.csv’ #4) metadata coded file made manually from ‘merge_GSE61915_GSE73503_GSE83931_metadata_SampleID.csv’ outputed at end of merge code Step5 to give ‘merge_GSE61915_GSE73503_GSE83931_metadata_SampleID_coded’ code used is Study GSE61915=1, GSE73503=2, GSE83931=3; Gender Female=1, Male=2, MaleFemale=3 #5) run code till step 22 and then based on cell type and trait associated module replace in code “yellow” or “MsAge7K_04” with module name that is associated with trait or disease of interest. And replace hub gene names. #6) WGCNA assigns colors randomly (except few reserved colors like “grey” for unassigned genes). Therefore, upon re-run of this analysis it may call the age associated microglia enriched “yellow” module by some other color such as “brown”, in which case this color is specified step 23 onwards as described in point 5) above. #7) The present analysis was done on MacOS, using knitToHtmlfragment.

#8) To reuse this code for other datasets a) replace 1) and 2) input files above with equivalent files for the dataset b) replace “MsAge7K_” for module naming to pre-fix of users choice that is meaningful for the dataset. Here Ms=Mouse, Age=Age c) this pipeline uses Human gene symbols

merging htseq-count data for RNA-seq mouse GSE61915, GSE73503 and GSE83931

Step1: Load required libraries and setting woking directory

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/MsAge11-30-18"
#Install packages by uncommenting two lines below if packages not already installed before
#source("https://bioconductor.org/biocLite.R")
##biocLite(c("stringr", "reshape2",  "dplyr", "ggplot2",  "magrittr", "edgeR", "DT))

library(stringr)
library(reshape2)
library(dplyr)
library(ggplot2)
library(magrittr)
library(edgeR)
library(DT)

Step2: Merge the htseq-counts expression data

setwd("./InputHtseqCount")
#Be careful to check that the input pattern here is such that only the count data and no other files are selected
htseqcountfiles <- list.files(pattern = "*.txt")
htseqcountfiles
##  [1] "SRR1593496.txt" "SRR1593497.txt" "SRR1593498.txt" "SRR1593499.txt"
##  [5] "SRR1593500.txt" "SRR1593501.txt" "SRR1593502.txt" "SRR1593503.txt"
##  [9] "SRR1593504.txt" "SRR1593505.txt" "SRR1593506.txt" "SRR1593507.txt"
## [13] "SRR1593508.txt" "SRR1593509.txt" "SRR1593510.txt" "SRR1593511.txt"
## [17] "SRR1593512.txt" "SRR2531532.txt" "SRR2531533.txt" "SRR2531534.txt"
## [21] "SRR2531535.txt" "SRR2531536.txt" "SRR2531537.txt" "SRR2531538.txt"
## [25] "SRR2531539.txt" "SRR2531540.txt" "SRR2531541.txt" "SRR2531542.txt"
## [29] "SRR2531543.txt" "SRR2531544.txt" "SRR2531545.txt" "SRR2531546.txt"
## [33] "SRR2531547.txt" "SRR2531548.txt" "SRR2531549.txt" "SRR2531550.txt"
## [37] "SRR2531551.txt" "SRR2531552.txt" "SRR2531553.txt" "SRR2531554.txt"
## [41] "SRR2531555.txt" "SRR3734796.txt" "SRR3734797.txt" "SRR3734798.txt"
## [45] "SRR3734799.txt" "SRR3734800.txt" "SRR3734801.txt" "SRR3734802.txt"
## [49] "SRR3734803.txt" "SRR3734804.txt" "SRR3734805.txt" "SRR3734806.txt"
## [53] "SRR3734807.txt" "SRR3734808.txt" "SRR3734809.txt" "SRR3734810.txt"
## [57] "SRR3734811.txt" "SRR3734812.txt" "SRR3734813.txt" "SRR3734814.txt"
## [61] "SRR3734815.txt" "SRR3734816.txt" "SRR3734817.txt" "SRR3734818.txt"
## [65] "SRR3734819.txt" "SRR3734820.txt" "SRR3734821.txt" "SRR3734822.txt"
## [69] "SRR3734823.txt" "SRR3734824.txt" "SRR3734825.txt"
setwd(wd)
setwd("./InputHtseqCount")
#Given that the above files were imported properly as shown above, lets read the entire list of files into a single object
readhtseqcountfiles = lapply(htseqcountfiles, read.table, sep="\t",fill=TRUE)
setwd(wd)
#Now that all the files in the list have been 'read' we can merge them by 'V1' column which has gene symbols
htseqCount <- Reduce(function(x, y) {
    merge(x, y, all=TRUE, by="V1")
}, readhtseqcountfiles)
#Removal of extra htseqcount information
htseqCount1=htseqCount[6:53677,]

#Gene names to row names
rownames(htseqCount1)=htseqCount1$V1
htseqCount1$V1=NULL

#Sample names as colnames
htseqCount2 <- htseqCount1
colnames(htseqCount2)<-htseqcountfiles
colnames(htseqCount2) <- gsub(".txt","",colnames(htseqCount2))
#Here we just eyeball and make sure that the order of the samples here is same as the order of the input sample htseqcount files in the first code chunk i.e. 'htseqcountfiles <- list.files(pattern = "*.txt")'
htseqcountfiles
##  [1] "SRR1593496.txt" "SRR1593497.txt" "SRR1593498.txt" "SRR1593499.txt"
##  [5] "SRR1593500.txt" "SRR1593501.txt" "SRR1593502.txt" "SRR1593503.txt"
##  [9] "SRR1593504.txt" "SRR1593505.txt" "SRR1593506.txt" "SRR1593507.txt"
## [13] "SRR1593508.txt" "SRR1593509.txt" "SRR1593510.txt" "SRR1593511.txt"
## [17] "SRR1593512.txt" "SRR2531532.txt" "SRR2531533.txt" "SRR2531534.txt"
## [21] "SRR2531535.txt" "SRR2531536.txt" "SRR2531537.txt" "SRR2531538.txt"
## [25] "SRR2531539.txt" "SRR2531540.txt" "SRR2531541.txt" "SRR2531542.txt"
## [29] "SRR2531543.txt" "SRR2531544.txt" "SRR2531545.txt" "SRR2531546.txt"
## [33] "SRR2531547.txt" "SRR2531548.txt" "SRR2531549.txt" "SRR2531550.txt"
## [37] "SRR2531551.txt" "SRR2531552.txt" "SRR2531553.txt" "SRR2531554.txt"
## [41] "SRR2531555.txt" "SRR3734796.txt" "SRR3734797.txt" "SRR3734798.txt"
## [45] "SRR3734799.txt" "SRR3734800.txt" "SRR3734801.txt" "SRR3734802.txt"
## [49] "SRR3734803.txt" "SRR3734804.txt" "SRR3734805.txt" "SRR3734806.txt"
## [53] "SRR3734807.txt" "SRR3734808.txt" "SRR3734809.txt" "SRR3734810.txt"
## [57] "SRR3734811.txt" "SRR3734812.txt" "SRR3734813.txt" "SRR3734814.txt"
## [61] "SRR3734815.txt" "SRR3734816.txt" "SRR3734817.txt" "SRR3734818.txt"
## [65] "SRR3734819.txt" "SRR3734820.txt" "SRR3734821.txt" "SRR3734822.txt"
## [69] "SRR3734823.txt" "SRR3734824.txt" "SRR3734825.txt"
colnames(htseqCount2)
##  [1] "SRR1593496" "SRR1593497" "SRR1593498" "SRR1593499" "SRR1593500"
##  [6] "SRR1593501" "SRR1593502" "SRR1593503" "SRR1593504" "SRR1593505"
## [11] "SRR1593506" "SRR1593507" "SRR1593508" "SRR1593509" "SRR1593510"
## [16] "SRR1593511" "SRR1593512" "SRR2531532" "SRR2531533" "SRR2531534"
## [21] "SRR2531535" "SRR2531536" "SRR2531537" "SRR2531538" "SRR2531539"
## [26] "SRR2531540" "SRR2531541" "SRR2531542" "SRR2531543" "SRR2531544"
## [31] "SRR2531545" "SRR2531546" "SRR2531547" "SRR2531548" "SRR2531549"
## [36] "SRR2531550" "SRR2531551" "SRR2531552" "SRR2531553" "SRR2531554"
## [41] "SRR2531555" "SRR3734796" "SRR3734797" "SRR3734798" "SRR3734799"
## [46] "SRR3734800" "SRR3734801" "SRR3734802" "SRR3734803" "SRR3734804"
## [51] "SRR3734805" "SRR3734806" "SRR3734807" "SRR3734808" "SRR3734809"
## [56] "SRR3734810" "SRR3734811" "SRR3734812" "SRR3734813" "SRR3734814"
## [61] "SRR3734815" "SRR3734816" "SRR3734817" "SRR3734818" "SRR3734819"
## [66] "SRR3734820" "SRR3734821" "SRR3734822" "SRR3734823" "SRR3734824"
## [71] "SRR3734825"
#72 columns i.e. 71 samples + 1 column of genes, number of rows is the total number of gene annotations
dim(htseqCount2)
## [1] 53672    71
#Also can check that this is correct by looking at the first gene expression here '0610005C13Rik' in the individual htseqcount files SRR1593496.txt and SRR1593497.txt. I checked and it looks ok.
DT::datatable(head(htseqCount2))

Step3: Matching GSM or SRR number and replacing Sample_names of choice from merged metadata (same order as merging of expression data above)

Also incuded is a match column step in this code to ensure match

Before importing the metadate file, decide and include a column of sample names of your liking and the Study or batch information column you have.

The GSE33000_metadata.csv rows are followed by GSE43490_metadata.csv to make merged ‘merge_GSE33000_GSE43490_metadata.csv’

merge_metadata=read.csv('./InputMsAge/GSE61915_GSE73503_GSE83931_metadata_edit.csv', header =T, sep=',')
DT::datatable(head(merge_metadata))
#Number of rows of above metadata is same as number of columns of Expr 
dim(htseqCount2)
## [1] 53672    71
dim(merge_metadata)
## [1] 71  6
#replace the column names in expression with sample names
colnames(htseqCount2) <- merge_metadata$Sample_name[match(colnames(htseqCount2), merge_metadata$Run)]
DT::datatable(head(htseqCount2))
#Also can check that this is correct by looking at the first gene expression here '0610005C13Rik' in the individual htseqcount files SRR1593496.txt and SRR1593497.txt. I checked and it looks ok.
#Lets drop the Run number from the metadata now
merge_metadata=merge_metadata[,-c(1)]
DT::datatable(head(merge_metadata))

Step4: Will normalize RNA-seq library size differences using cpm (counts per million) edgeR function and convert to log2+1 space

#edgeR recommends to remove genes without atleast 1 cpm in >= no. of samples in minimum sample group. For us it is 3 for 29 months old mouse sample
Keep<-rowSums(cpm(htseqCount2) > 1) >= 3
htseqCount2_keep<-htseqCount2[Keep, ]
dim(htseqCount2)
## [1] 53672    71
dim(htseqCount2_keep) 
## [1] 16769    71
#We remove 53K-16K=37K low genes
#cpm normalize for library size differences. Alternatives are RPKM or TPM normalization. 
htseqCount2_keepcpm=cpm(htseqCount2_keep, normalized.lib.sizes=TRUE)
DT::datatable(htseqCount2_keepcpm[1:7,1:7])
#log2+1 transform. Our Human array data is also in log2+1 space so now RNA-seq mouse will be consistent with that
htseqCount2_cpmlog=log2(htseqCount2_keepcpm+1)
DT::datatable(htseqCount2_cpmlog[1:7,1:7])

#4.1 Check for negative values. Negative values will not work in WGCNA

exprRaw_ifneg<-apply(htseqCount2, 1, function(row) any(row <0))
length(which(exprRaw_ifneg)) #what is the length of negative numbers
## [1] 0
exprRaw1_ifneg<-apply(htseqCount2_keep, 1, function(row) any(row <0))
length(which(exprRaw1_ifneg)) #what is the length of negative numbers
## [1] 0
exprCpm_ifneg<-apply(htseqCount2_keepcpm, 1, function(row) any(row <0))
length(which(exprCpm_ifneg)) #what is the length of negative numbers
## [1] 0
exprCpmLog_ifneg<-apply(htseqCount2_cpmlog, 1, function(row) any(row <0))
length(which(exprCpmLog_ifneg)) #what is the length of negative numbers
## [1] 0
#0 means no negative values 

#4.2 Check for max and min values. Ususally don’t want max to be <100

exprRaw_max<-which.max(as.matrix(htseqCount2))
exprRaw_max
## [1] 1705292
exprRaw_min<-which.min(as.matrix(htseqCount2))
exprRaw_min
## [1] 2
exprRaw1_max<-which.max(as.matrix(htseqCount2_keep))
exprRaw1_max
## [1] 529693
exprRaw1_min<-which.min(as.matrix(htseqCount2_keep))
exprRaw1_min
## [1] 183
exprCpm_max<-which.max(as.matrix(htseqCount2_keepcpm))
exprCpm_max
## [1] 378772
exprCpm_min<-which.min(as.matrix(htseqCount2_keepcpm))
exprCpm_min
## [1] 183
exprCpmLog_max<-which.max(as.matrix(htseqCount2_cpmlog))
exprCpmLog_max
## [1] 378772
exprCpmLog_min<-which.min(as.matrix(htseqCount2_cpmlog))
exprCpmLog_min
## [1] 183

#4.3 Descriptive or summary statistics of the data

DT::datatable(summary(htseqCount2))
DT::datatable(summary(htseqCount2_keep))
DT::datatable(summary(htseqCount2_keepcpm))
DT::datatable(summary(htseqCount2_cpmlog))

#4.4 Visualization of merged GSE61915 GSE73503 and GSE83931

pdf(file="Visualization_GSE61915_GSE73503_GSE83931_rawdata_transformation.pdf",height=5,width=5)
par(mfrow=c(2,2)) #plots are big, so put one per page

#exprRaw boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(htseqCount2, outline=FALSE, las=2, cex=0.25, main="exprRaw", col="yellow")

#exprRaw1 boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(htseqCount2_keep, outline=FALSE, las=2, cex=0.25, main="exprRaw", col="yellow")

#exprCpm boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(htseqCount2_keepcpm, outline=FALSE, las=2, cex=0.25, main="exprCpm", col="yellow")

#exprCpmLog boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(htseqCount2_cpmlog, outline=FALSE, las=2, cex=0.25, main="exprCpmLog", col="yellow")

dev.off()
## quartz_off_screen 
##                 2

Step5: we can export the cpmlog Expression data and Metadata with Sample_name for SVA LM analysis

write.csv(htseqCount2_cpmlog,"merge_GSE61915_GSE73503_GSE83931_Expr_GeneMs_SampleID.csv")
write.csv(merge_metadata,"merge_GSE61915_GSE73503_GSE83931_metadata_SampleID.csv")
#The files we needed are saved so we now clear workspace and delete files and folders that are not needed 
save.image(file="preSVALM_temp.RData")
rm(list=ls())
gc()
##           used (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  827533 44.2    1467998  78.4         NA  1467998  78.4
## Vcells 1532134 11.7   27065806 206.5     102400 33830409 258.2

SVA+LM normalization

Checklist for running this pipeline:

Step6: Load libraries, set woking directory and Import data

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/MsAge11-30-18"
#source("https://bioconductor.org/biocLite.R")
#biocLite(c("sva","limma","bladderbatch","pamr"))
library(limma)
library(sva)
library(pamr)
library(ggplot2) # plot
library(ggmap)#plots
library(gplots)#plots
library(RColorBrewer)#color pallet
library(Hmisc)#for corelation plot
library(viridis)#for corelation plot
library(corrplot)#for corelation plot
#Metadata_coded
metadata_coded=read.csv("./InputMsAge/merge_GSE61915_GSE73503_GSE83931_metadata_SampleID_coded.csv", header=T, sep=',')
#Expression data
Expr_MsGene=read.csv("merge_GSE61915_GSE73503_GSE83931_Expr_GeneMs_SampleID.csv", header =T, sep=',')
#Rename the gene symbol column from X to GeneMs
colnames(Expr_MsGene)[colnames(Expr_MsGene) == 'X'] <- 'GeneMs'
dim(metadata_coded)
## [1] 71  6
dim(Expr_MsGene)
## [1] 16769    72
#Need to make sure that the Gene IDs column labeled as GeneMs are unique to specify rownames with Gene IDs
Expr_MsGene = Expr_MsGene[!duplicated(Expr_MsGene$GeneMs),]
dim(metadata_coded)
## [1] 71  6
dim(Expr_MsGene)
## [1] 16769    72
#We lost no Gene IDs as they are no duplicates in RNA-seq unline microarray
Expr_MsGene_1=Expr_MsGene[,-1] #get rid of extra column
rownames(Expr_MsGene_1)=Expr_MsGene$GeneMs 
Expr_MsGene_1=Expr_MsGene_1[complete.cases(Expr_MsGene_1), ]
#Filter low counts or else svaobj step compalins about missing values and all zero rows
Expr_MsGene_2=Expr_MsGene_1[rowSums(Expr_MsGene_1)>0,]
edata=Expr_MsGene_2
metadata_coded_1=metadata_coded[,3:5]
rownames(metadata_coded_1)=metadata_coded$Sample_name
pheno=metadata_coded_1
DT::datatable(pheno)
DT::datatable(edata[1:7,1:7])
dim(metadata_coded)
## [1] 71  6
dim(pheno)
## [1] 71  3
dim(Expr_MsGene)
## [1] 16769    72
dim(edata)
## [1] 16769    71
#Make sure we did not lose any samples in expression data
#Here in the output the 1st two and the last two should be same i.e. the first and last samples are same
colnames(Expr_MsGene[2])
## [1] "MF_3_SRR1593496"
colnames(edata[1])
## [1] "MF_3_SRR1593496"
colnames(Expr_MsGene[72])
## [1] "M_4_SRR3734825"
colnames(edata[71])
## [1] "M_4_SRR3734825"

Step7: Preparing models for SVA (Surrogate variable adjustment)

#full model matrix, variable of interest and other variables
#Put categorical variables ahead of numeric variables 
#For this dataset there is only one Tissue type so its not included in the model
mod = model.matrix(~Study+Gender+Age, data=pheno) 
#null model matrix all except the variable of interest 
#To include only an intercept use mode 'mod0 = model.matrix(~1,data=pheno)'
#Put categorical variables ahead of numeric variables
mod0 = model.matrix(~Study+Gender,data=pheno)

Step8: Running SVA (Surrogate variable adjustment)

#Estimation of Surrogate variable
n.sv = num.sv(as.matrix(edata),mod,method="be") 
svobj = sva(as.matrix(edata),mod,mod0,n.sv=n.sv, B=20)
## Number of significant surrogate variables is:  8 
## Iteration (out of 20 ):1  2  3  4  5  6  7  8  9  10  11  12  13  14  15  16  17  18  19  20

sva function returns a list of 4 components: 1) sv= matrix with columns corrosponding to estimate of surrogate variables 2) pprob.gam=probability that each gene is associated with one or more latent variables 3) pprob.b=probability that each gene is associated with our variable of interest 4) n.sv=number of surrogate variables

Step9: Clean SVA adjusted expression data with LM regression

#Note here the column which we care about is not used pheno[c(-?)], Age in 3rd column
pheno_sva=cbind(pheno[c(-3)],svobj$sv)
reglm_sva<-lapply(1:nrow(edata), function(x){lm(unlist(edata[x,])~.,data=pheno_sva)})
DT::datatable(pheno_sva)
residuals<-lapply(reglm_sva, function(x)residuals(summary(x)))
residuals<-do.call(rbind, residuals)
edata_adjresiduals<-residuals+matrix(apply(edata, 1, mean), nrow=nrow(residuals), ncol=ncol(residuals))
rownames(edata_adjresiduals)=rownames(edata)
rownames(residuals)=rownames(edata)
DT::datatable(edata_adjresiduals[1:7,1:7])
dim(edata_adjresiduals)
## [1] 16769    71
write.csv(edata_adjresiduals, "edata_adjresiduals.csv")
#Save .RData and clear environment to free up memory
save.image(file="temp.RData")
rm(list=ls())
gc()
##           used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells 3857352 206.1    9912070 529.4         NA  9912070 529.4
## Vcells 6766664  51.7   52283984 398.9     102400 60608265 462.5
#This 'reglm_sva' is a big object so we remove it as we have saved the other variables we need 
load(file="temp.RData")
rm(reglm_sva)
gc() 
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  3874792 207.0   13091016 699.2         NA  9912070 529.4
## Vcells 14075309 107.4   51861685 395.7     102400 60608265 462.5

Step10: Visualization of before and after SVA and LM regression

Before SVA and LM adjustment

edata #After SVA and LM adjustment edata_adjresiduals

#Summary Statistics edata
DT::datatable(summary(edata[1:7,1:7]))
write.csv(summary(edata),"summary_stats_edata.csv")
#Summary Statistics edata_adjresiduals
DT::datatable(summary(edata_adjresiduals[1:7,1:7]))
write.csv(summary(edata_adjresiduals),"summary_stats_edata_adjresiduals.csv")
pdf(file="Visualization_batch_before_and_after_SVA-LM.pdf",height=10,width=10)
par(mfrow=c(2,2))

#Before Correlation
# Colorbar along the heatmap represents Study or batch
nconditions <- nlevels(as.factor(pheno$Study))
npal <- colorRampPalette(brewer.pal(nconditions, "Set1"))(nconditions)
condition_colors <- npal[as.integer(pheno$Study)]
heatmap.2(cor(edata), RowSideColors=condition_colors,
          trace='none', cexRow=0.5, main='Sample correlations before SVA LM Study')

#After Correlation
# Colorbar along the heatmap represents Study
nconditions <- nlevels(as.factor(pheno$Study))
npal <- colorRampPalette(brewer.pal(nconditions, "Set1"))(nconditions)
condition_colors <- npal[as.integer(pheno$Study)]
heatmap.2(cor(edata_adjresiduals), RowSideColors=condition_colors,
          trace='none',  cexRow=0.5, main='Sample correlations after SVA LM Study')

#Before boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(edata, outline=FALSE, las=2, cex=0.25, main="before SVA LM Study", col="yellow")

#After boxplot
par(mai=c(1,0.8,1,0.8))
boxplot(edata_adjresiduals, outline=FALSE, las=2, cex=0.25, main="after SVA LM Study", col="yellow")

#Before MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(edata, col=condition_colors, legend= "all", main="before SVA LM Study", cex=0.5)#like PCA plot

#After MDSplot, colored by Study
par(mai=c(1,0.8,1,0.8))
plotMDS(edata_adjresiduals, col=condition_colors, legend= "all", main="after SVA LM Study", cex=0.5)#like PCA plot

#Before PCAplot, colored by Study or batch
pca0=prcomp(t(edata), center=T, scale=T)
names(pca0)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
#summary(pca0)
pca0$x[1:3,1:3]
##                       PC1       PC2        PC3
## MF_3_SRR1593496 -62.87600 120.25801 -28.043048
## MF_3_SRR1593497 -72.81853  90.57949 -17.385049
## MF_3_SRR1593498 -74.72239 113.71426   4.161144
plot(x=pca0$x[,1], y=pca0$x[,2],
     cex=0,
     xlab="PC1", ylab="PC2",
     main="PC plot colored by Study or batch before SVA LM",
     font=2
)
text(x=pca0$x[,1], y=pca0$x[,2],
     labels=pheno$Age,
     col=pheno$Study,
     cex=0.7, font=2)
legend("bottomright",
       legend=c("GSE61915", "GSE73517", "GSE83931"),
       text.col=c("red", "blue", "green")
)

#After PCAplot, colored by Study or batch
pca0=prcomp(t(edata_adjresiduals), center=T, scale=T)
names(pca0)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
#summary(pca0)
pca0$x[1:3,1:3]
##                        PC1       PC2         PC3
## MF_3_SRR1593496 -11.484726  3.801784 -17.7970167
## MF_3_SRR1593497  -9.777392 19.249424 -13.5698688
## MF_3_SRR1593498   1.930436 55.675007  -0.9632216
plot(x=pca0$x[,1], y=pca0$x[,2],
     cex=0,
     xlab="PC1", ylab="PC2",
     main="PC plot colored by Study or batch after SVA LM",
     font=2
)
text(x=pca0$x[,1], y=pca0$x[,2],
     labels=pheno$Age,
     col=pheno$Study,
     cex=0.7, font=2)
legend("bottomright",
       legend=c("GSE61915", "GSE73517", "GSE83931"),
       text.col=c("red", "blue", "green")
)

dev.off()
## quartz_off_screen 
##                 2

Step11: we can export the Expression data for WGCNA

#Export edata_adjustedresidual SVA and LM normalized data for WGCNA
write.csv(edata_adjresiduals, "MsAgeInterest_edata_adjresiduals_GeneID.csv")
#save image
save.image(file="SVALM_temp.RData")

WGCNA top variable 7301 genes same number (not same genes necesarrly) as human genes

Step12: Prepation step: Converting the mouse gene symbols to human gene symbols

#Load required libraries
library(Rcpp)
library(stringi)
library(biomaRt)
#The useMart() function can now be used to connect to a specified BioMart database, this must be a valid name given by listMarts(). In the next example we choose to query the Ensembl BioMart database.
listMarts()
##                biomart               version
## 1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 94
## 2   ENSEMBL_MART_MOUSE      Mouse strains 94
## 3     ENSEMBL_MART_SNP  Ensembl Variation 94
## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 94

This additional step converts mouse gene symbols of mouse aging data to human gene symbols. We need to do this because the disease data we want to compare with is human

Expr_MsGene=read.csv("MsAgeInterest_edata_adjresiduals_GeneID.csv", header =T, sep=',')
ensembl=useMart("ensembl")
listDatasets(ensembl)
##                            dataset
## 1         acalliptera_gene_ensembl
## 2       acarolinensis_gene_ensembl
## 3        acitrinellus_gene_ensembl
## 4        amelanoleuca_gene_ensembl
## 5          amexicanus_gene_ensembl
## 6          anancymaae_gene_ensembl
## 7          aocellaris_gene_ensembl
## 8            apercula_gene_ensembl
## 9      aplatyrhynchos_gene_ensembl
## 10      apolyacanthus_gene_ensembl
## 11       atestudineus_gene_ensembl
## 12            btaurus_gene_ensembl
## 13            caperea_gene_ensembl
## 14              catys_gene_ensembl
## 15         ccapucinus_gene_ensembl
## 16         cchok1gshd_gene_ensembl
## 17            ccrigri_gene_ensembl
## 18           celegans_gene_ensembl
## 19        cfamiliaris_gene_ensembl
## 20            chircus_gene_ensembl
## 21         choffmanni_gene_ensembl
## 22      cintestinalis_gene_ensembl
## 23           cjacchus_gene_ensembl
## 24          clanigera_gene_ensembl
## 25         cpalliatus_gene_ensembl
## 26         cporcellus_gene_ensembl
## 27           csabaeus_gene_ensembl
## 28          csavignyi_gene_ensembl
## 29        csemilaevis_gene_ensembl
## 30          csyrichta_gene_ensembl
## 31        cvariegatus_gene_ensembl
## 32      dmelanogaster_gene_ensembl
## 33      dnovemcinctus_gene_ensembl
## 34             dordii_gene_ensembl
## 35             drerio_gene_ensembl
## 36           eburgeri_gene_ensembl
## 37          ecaballus_gene_ensembl
## 38         eeuropaeus_gene_ensembl
## 39            elucius_gene_ensembl
## 40          etelfairi_gene_ensembl
## 41        falbicollis_gene_ensembl
## 42             fcatus_gene_ensembl
## 43        fdamarensis_gene_ensembl
## 44      fheteroclitus_gene_ensembl
## 45         gaculeatus_gene_ensembl
## 46           gaffinis_gene_ensembl
## 47            ggallus_gene_ensembl
## 48           ggorilla_gene_ensembl
## 49            gmorhua_gene_ensembl
## 50           hburtoni_gene_ensembl
## 51             hcomes_gene_ensembl
## 52            hfemale_gene_ensembl
## 53              hmale_gene_ensembl
## 54           hsapiens_gene_ensembl
## 55         ipunctatus_gene_ensembl
## 56  itridecemlineatus_gene_ensembl
## 57           jjaculus_gene_ensembl
## 58        kmarmoratus_gene_ensembl
## 59          lafricana_gene_ensembl
## 60          lbergylta_gene_ensembl
## 61         lchalumnae_gene_ensembl
## 62          loculatus_gene_ensembl
## 63             malbus_gene_ensembl
## 64           marmatus_gene_ensembl
## 65           mauratus_gene_ensembl
## 66            mcaroli_gene_ensembl
## 67         mdomestica_gene_ensembl
## 68      mfascicularis_gene_ensembl
## 69              mfuro_gene_ensembl
## 70         mgallopavo_gene_ensembl
## 71       mleucophaeus_gene_ensembl
## 72         mlucifugus_gene_ensembl
## 73              mmola_gene_ensembl
## 74           mmulatta_gene_ensembl
## 75           mmurinus_gene_ensembl
## 76          mmusculus_gene_ensembl
## 77        mnemestrina_gene_ensembl
## 78       mochrogaster_gene_ensembl
## 79            mpahari_gene_ensembl
## 80           mspretus_gene_ensembl
## 81             mzebra_gene_ensembl
## 82         nbrichardi_gene_ensembl
## 83           neugenii_gene_ensembl
## 84            ngalili_gene_ensembl
## 85        nleucogenys_gene_ensembl
## 86          oanatinus_gene_ensembl
## 87             oaries_gene_ensembl
## 88         ocuniculus_gene_ensembl
## 89             odegus_gene_ensembl
## 90         ogarnettii_gene_ensembl
## 91               ohni_gene_ensembl
## 92              ohsok_gene_ensembl
## 93           olatipes_gene_ensembl
## 94        omelastigma_gene_ensembl
## 95         oniloticus_gene_ensembl
## 96          oprinceps_gene_ensembl
## 97            pabelii_gene_ensembl
## 98           paltaica_gene_ensembl
## 99            panubis_gene_ensembl
## 100          pbairdii_gene_ensembl
## 101         pcapensis_gene_ensembl
## 102        pcoquereli_gene_ensembl
## 103          pformosa_gene_ensembl
## 104       pkingsleyae_gene_ensembl
## 105        platipinna_gene_ensembl
## 106   pmagnuspinnatus_gene_ensembl
## 107          pmarinus_gene_ensembl
## 108         pmexicana_gene_ensembl
## 109        pnattereri_gene_ensembl
## 110         pnyererei_gene_ensembl
## 111         ppaniscus_gene_ensembl
## 112           ppardus_gene_ensembl
## 113       preticulata_gene_ensembl
## 114         psinensis_gene_ensembl
## 115      ptroglodytes_gene_ensembl
## 116         pvampyrus_gene_ensembl
## 117            rbieti_gene_ensembl
## 118       rnorvegicus_gene_ensembl
## 119        rroxellana_gene_ensembl
## 120          saraneus_gene_ensembl
## 121      sboliviensis_gene_ensembl
## 122       scerevisiae_gene_ensembl
## 123         sdorsalis_gene_ensembl
## 124         sdumerili_gene_ensembl
## 125         sformosus_gene_ensembl
## 126         sharrisii_gene_ensembl
## 127          smaximus_gene_ensembl
## 128         spartitus_gene_ensembl
## 129           sscrofa_gene_ensembl
## 130        tbelangeri_gene_ensembl
## 131          tguttata_gene_ensembl
## 132     tnigroviridis_gene_ensembl
## 133         trubripes_gene_ensembl
## 134        ttruncatus_gene_ensembl
## 135            vpacos_gene_ensembl
## 136       xcouchianus_gene_ensembl
## 137        xmaculatus_gene_ensembl
## 138       xtropicalis_gene_ensembl
##                                                      description
## 1                               Eastern happy genes (fAstCal1.2)
## 2                                 Anole lizard genes (AnoCar2.0)
## 3                                 Midas cichlid genes (Midas_v5)
## 4                                          Panda genes (ailMel1)
## 5                       Cave fish genes (Astyanax_mexicanus-2.0)
## 6                             Ma's night monkey genes (Anan_2.0)
## 7                            Clown anemonefish genes (AmpOce1.0)
## 8                               Orange clownfish genes (Nemo_v1)
## 9                                      Duck genes (BGI_duck_1.0)
## 10                             Spiny chromis genes (ASM210954v1)
## 11                             Climbing perch genes (fAnaTes1.1)
## 12                                            Cow genes (UMD3.1)
## 13                         Brazilian guinea pig genes (CavAp1.0)
## 14                               Sooty mangabey genes (Caty_1.0)
## 15                           Capuchin genes (Cebus_imitator-1.0)
## 16                  Chinese hamster CHOK1GS genes (CHOK1GS_HDv1)
## 17                     Chinese hamster CriGri genes (CriGri_1.0)
## 18                       Caenorhabditis elegans genes (WBcel235)
## 19                                         Dog genes (CanFam3.1)
## 20                                             Goat genes (ARS1)
## 21                                         Sloth genes (choHof1)
## 22                                     C.intestinalis genes (KH)
## 23                                  Marmoset genes (ASM275486v1)
## 24                      Long-tailed chinchilla genes (ChiLan1.0)
## 25                            Angola colobus genes (Cang.pa_1.0)
## 26                                  Guinea Pig genes (Cavpor3.0)
## 27                                  Vervet-AGM genes (ChlSab1.1)
## 28                                   C.savignyi genes (CSAV 2.0)
## 29                                  Tongue sole genes (Cse_v1.0)
## 30                        Tarsier genes (Tarsius_syrichta-2.0.1)
## 31                    Sheepshead minnow genes (C_variegatus-1.0)
## 32                                        Fruitfly genes (BDGP6)
## 33                                   Armadillo genes (Dasnov3.0)
## 34                                 Kangaroo rat genes (Dord_2.0)
## 35                                      Zebrafish genes (GRCz11)
## 36                                  Hagfish genes (Eburgeri_3.2)
## 37                                       Horse genes (Equ Cab 2)
## 38                                      Hedgehog genes (eriEur1)
## 39                                 Northern pike genes (Eluc_V3)
## 40                         Lesser hedgehog tenrec genes (TENREC)
## 41                                 Flycatcher genes (FicAlb_1.4)
## 42                                   Cat genes (Felis_catus_9.0)
## 43                              Damara mole rat genes (DMR_v1.0)
## 44                 Mummichog genes (Fundulus_heteroclitus-3.0.2)
## 45                                  Stickleback genes (BROAD S1)
## 46                      Western mosquitofish genes (ASM309773v1)
## 47                             Chicken genes (Gallus_gallus-5.0)
## 48                                       Gorilla genes (gorGor4)
## 49                                           Cod genes (gadMor1)
## 50                       Burton's mouthbrooder genes (AstBur1.0)
## 51                    Tiger tail seahorse genes (H_comes_QL1_v1)
## 52               Naked mole-rat female genes (HetGla_female_1.0)
## 53                        Naked mole-rat male genes (HetGla_1.0)
## 54                                      Human genes (GRCh38.p12)
## 55                            Channel catfish genes (IpCoco_1.2)
## 56                                    Squirrel genes (SpeTri2.0)
## 57                      Lesser Egyptian jerboa genes (JacJac1.0)
## 58                          Mangrove rivulus genes (ASM164957v1)
## 59                                    Elephant genes (Loxafr3.0)
## 60                              Ballan wrasse genes (BallGen_V1)
## 61                                    Coelacanth genes (LatCha1)
## 62                                   Spotted gar genes (LepOcu1)
## 63                                 Swamp eel genes (M_albus_1.0)
## 64                                Zig-zag eel genes (fMasArm1.1)
## 65                              Golden Hamster genes (MesAur1.0)
## 66                          Ryukyu mouse genes (CAROLI_EIJ_v1.1)
## 67                                       Opossum genes (monDom5)
## 68           Crab-eating macaque genes (Macaca_fascicularis_5.0)
## 69                                   Ferret genes (MusPutFur1.0)
## 70                                    Turkey genes (Turkey_2.01)
## 71                                     Drill genes (Mleu.le_1.0)
## 72                                    Microbat genes (Myoluc2.0)
## 73                             Ocean sunfish genes (ASM169857v1)
## 74                                    Macaque genes (Mmul_8.0.1)
## 75                                  Mouse Lemur genes (Mmur_3.0)
## 76                                       Mouse genes (GRCm38.p6)
## 77                           Pig-tailed macaque genes (Mnem_1.0)
## 78                                Prairie vole genes (MicOch1.0)
## 79                           Shrew mouse genes (PAHARI_EIJ_v1.1)
## 80                           Algerian mouse genes (SPRET_EiJ_v1)
## 81                             Zebra mbuna genes (M_zebra_UMD2a)
## 82                            Lyretail cichlid genes (NeoBri1.0)
## 83                                      Wallaby genes (Meug_1.0)
## 84  Upper Galilee mountains blind mole rat genes (S.galili_v1.0)
## 85                                       Gibbon genes (Nleu_3.0)
## 86                                        Platypus genes (OANA5)
## 87                                        Sheep genes (Oar_v3.1)
## 88                                      Rabbit genes (OryCun2.0)
## 89                                        Degu genes (OctDeg1.0)
## 90                                      Bushbaby genes (OtoGar3)
## 91                       Japanese Medaka HNI genes (ASM223471v1)
## 92                      Japanese Medaka HSOK genes (ASM223469v1)
## 93                      Japanese Medaka HdrR genes (ASM223467v1)
## 94                            Indian medaka genes (Om_v0.7.RACA)
## 95                                     Tilapia genes (Orenil1.0)
## 96                                    Pika genes (OchPri2.0-Ens)
## 97                                       Orangutan genes (PPYG2)
## 98                                       Tiger genes (PanTig1.0)
## 99                                 Olive baboon genes (Panu_3.0)
## 100                Northern American deer mouse genes (Pman_1.0)
## 101                                        Hyrax genes (proCap1)
## 102                           Coquerel's sifaka genes (Pcoq_1.0)
## 103                  Amazon molly genes (Poecilia_formosa-5.1.2)
## 104                  Paramormyrops kingsleyae genes (PKINGS_0.1)
## 105                        Sailfin molly genes (P_latipinna-1.0)
## 106                          Big-finned mudskipper genes (PM.fa)
## 107                                 Lamprey genes (Pmarinus_7.0)
## 108                        Shortfin molly genes (P_mexicana-1.0)
## 109      Red-bellied piranha genes (Pygocentrus_nattereri-1.0.2)
## 110                        Pundamilia nyererei genes (PunNye1.0)
## 111                                     Bonobo genes (panpan1.1)
## 112                                    Leopard genes (PanPar1.0)
## 113                            Guppy genes (Guppy_female_1.0_MT)
## 114                  Chinese softshell turtle genes (PelSin_1.0)
## 115                               Chimpanzee genes (Pan_tro_3.0)
## 116                                      Megabat genes (pteVam1)
## 117                  Black snub-nosed monkey genes (ASM169854v1)
## 118                                         Rat genes (Rnor_6.0)
## 119                     Golden snub-nosed monkey genes (Rrox_v1)
## 120                                        Shrew genes (sorAra1)
## 121                   Bolivian squirrel monkey genes (SaiBol1.0)
## 122                     Saccharomyces cerevisiae genes (R64-1-1)
## 123                          Yellowtail amberjack genes (Sedor1)
## 124                            Greater amberjack genes (Sdu_1.0)
## 125                         Asian bonytongue genes (ASM162426v1)
## 126                       Tasmanian devil genes (Devil_ref v7.0)
## 127                                   Turbot genes (ASM318616v1)
## 128          Bicolor damselfish genes (Stegastes_partitus-1.0.2)
## 129                                      Pig genes (Sscrofa11.1)
## 130                                   Tree Shrew genes (tupBel1)
## 131                              Zebra Finch genes (taeGut3.2.4)
## 132                              Tetraodon genes (TETRAODON 8.0)
## 133                                           Fugu genes (FUGU5)
## 134                                      Dolphin genes (turTru1)
## 135                                       Alpaca genes (vicPac1)
## 136     Monterrey platyfish genes (Xiphophorus_couchianus-4.0.1)
## 137                       Platyfish genes (X_maculatus-5.0-male)
## 138                                      Xenopus genes (JGI 4.2)
##                          version
## 1                     fAstCal1.2
## 2                      AnoCar2.0
## 3                       Midas_v5
## 4                        ailMel1
## 5         Astyanax_mexicanus-2.0
## 6                       Anan_2.0
## 7                      AmpOce1.0
## 8                        Nemo_v1
## 9                   BGI_duck_1.0
## 10                   ASM210954v1
## 11                    fAnaTes1.1
## 12                        UMD3.1
## 13                      CavAp1.0
## 14                      Caty_1.0
## 15            Cebus_imitator-1.0
## 16                  CHOK1GS_HDv1
## 17                    CriGri_1.0
## 18                      WBcel235
## 19                     CanFam3.1
## 20                          ARS1
## 21                       choHof1
## 22                            KH
## 23                   ASM275486v1
## 24                     ChiLan1.0
## 25                   Cang.pa_1.0
## 26                     Cavpor3.0
## 27                     ChlSab1.1
## 28                      CSAV 2.0
## 29                      Cse_v1.0
## 30        Tarsius_syrichta-2.0.1
## 31              C_variegatus-1.0
## 32                         BDGP6
## 33                     Dasnov3.0
## 34                      Dord_2.0
## 35                        GRCz11
## 36                  Eburgeri_3.2
## 37                     Equ Cab 2
## 38                       eriEur1
## 39                       Eluc_V3
## 40                        TENREC
## 41                    FicAlb_1.4
## 42               Felis_catus_9.0
## 43                      DMR_v1.0
## 44   Fundulus_heteroclitus-3.0.2
## 45                      BROAD S1
## 46                   ASM309773v1
## 47             Gallus_gallus-5.0
## 48                       gorGor4
## 49                       gadMor1
## 50                     AstBur1.0
## 51                H_comes_QL1_v1
## 52             HetGla_female_1.0
## 53                    HetGla_1.0
## 54                    GRCh38.p12
## 55                    IpCoco_1.2
## 56                     SpeTri2.0
## 57                     JacJac1.0
## 58                   ASM164957v1
## 59                     Loxafr3.0
## 60                    BallGen_V1
## 61                       LatCha1
## 62                       LepOcu1
## 63                   M_albus_1.0
## 64                    fMasArm1.1
## 65                     MesAur1.0
## 66               CAROLI_EIJ_v1.1
## 67                       monDom5
## 68       Macaca_fascicularis_5.0
## 69                  MusPutFur1.0
## 70                   Turkey_2.01
## 71                   Mleu.le_1.0
## 72                     Myoluc2.0
## 73                   ASM169857v1
## 74                    Mmul_8.0.1
## 75                      Mmur_3.0
## 76                     GRCm38.p6
## 77                      Mnem_1.0
## 78                     MicOch1.0
## 79               PAHARI_EIJ_v1.1
## 80                  SPRET_EiJ_v1
## 81                 M_zebra_UMD2a
## 82                     NeoBri1.0
## 83                      Meug_1.0
## 84                 S.galili_v1.0
## 85                      Nleu_3.0
## 86                         OANA5
## 87                      Oar_v3.1
## 88                     OryCun2.0
## 89                     OctDeg1.0
## 90                       OtoGar3
## 91                   ASM223471v1
## 92                   ASM223469v1
## 93                   ASM223467v1
## 94                  Om_v0.7.RACA
## 95                     Orenil1.0
## 96                 OchPri2.0-Ens
## 97                         PPYG2
## 98                     PanTig1.0
## 99                      Panu_3.0
## 100                     Pman_1.0
## 101                      proCap1
## 102                     Pcoq_1.0
## 103       Poecilia_formosa-5.1.2
## 104                   PKINGS_0.1
## 105              P_latipinna-1.0
## 106                        PM.fa
## 107                 Pmarinus_7.0
## 108               P_mexicana-1.0
## 109  Pygocentrus_nattereri-1.0.2
## 110                    PunNye1.0
## 111                    panpan1.1
## 112                    PanPar1.0
## 113          Guppy_female_1.0_MT
## 114                   PelSin_1.0
## 115                  Pan_tro_3.0
## 116                      pteVam1
## 117                  ASM169854v1
## 118                     Rnor_6.0
## 119                      Rrox_v1
## 120                      sorAra1
## 121                    SaiBol1.0
## 122                      R64-1-1
## 123                       Sedor1
## 124                      Sdu_1.0
## 125                  ASM162426v1
## 126               Devil_ref v7.0
## 127                  ASM318616v1
## 128     Stegastes_partitus-1.0.2
## 129                  Sscrofa11.1
## 130                      tupBel1
## 131                  taeGut3.2.4
## 132                TETRAODON 8.0
## 133                        FUGU5
## 134                      turTru1
## 135                      vicPac1
## 136 Xiphophorus_couchianus-4.0.1
## 137         X_maculatus-5.0-male
## 138                      JGI 4.2
mouse = useMart('ENSEMBL_MART_ENSEMBL', dataset = 'mmusculus_gene_ensembl')
human = useMart('ENSEMBL_MART_ENSEMBL', dataset = 'hsapiens_gene_ensembl')
myMap = getLDS(attributes = "mgi_symbol", filters = 'mgi_symbol',
    values = Expr_MsGene$X, mart = mouse, 
    attributesL = c("hgnc_symbol"), martL = human)
#Some genes will be duplicates (HLA), so let's make them unique.
myMap=myMap[!duplicated(myMap$HGNC.symbol), ]
#unique(myMap$HGNC.symbol)
myMap_new<-myMap
names(myMap_new) <- c("X", "HuGene")

#Now, lets merge the human gene symbols with the mouse gene symbol in 'Expr_MsGene' to get a single file with human and mouse gene symbols, and the other data in our input file
Expr_MsGeneHuGene<-merge(Expr_MsGene,myMap_new,by='X')
colnames(Expr_MsGeneHuGene)[colnames(Expr_MsGeneHuGene)=="X"] <- "MsGene"
DT::datatable(Expr_MsGeneHuGene[1:7,1:7])
#Next we want to retain only the human Genes
#Expr_MsGeneHuGene=Expr_MsGeneHuGene_1[!duplicated(Expr_MsGeneHuGene$HuGene), ]
Expr_HuGene=Expr_MsGeneHuGene[,-c(1,73)]
rownames(Expr_HuGene)=Expr_MsGeneHuGene$HuGene
Expr_HuGene=Expr_HuGene[complete.cases(Expr_HuGene), ]
DT::datatable(Expr_HuGene)
write.csv(Expr_HuGene,"MsGenetoHuGeneAgeInterest_adjresiduals_edata_GeneID.csv")

Step13: Load libraries, set working directory and Import data for WGCNA

#If restarting program uncomment line below
#load(file="temp.RData")

#save working directory location
wd<-getwd()
wd
## [1] "/Users/powermac/Desktop/MsAge11-30-18"
#Load additional functions required for this pipeline
#Reference: Miller, J.A., Horvath, S., and Geschwind, D.H. (2010). Divergence of Human and mouse brain transcriptome highlights Alzheimer disease pathways. Proceedings of the National Academy of Sciences of the United States of America 107, 12698-12703.

write.geneList <- function(PG, filename, allProbes=0, allGenes=0, probe="g")
{
## These functions write a genelist / probelist to a file of geneNames

## USER inputs
# PG = the probe/gene you want written to a gene list
# allProbes = the list of probe names for the above probes
# allGenes = the list of gene names for the corresponding probes
# filename = the filename (can include folder)
# probe = the default ("g") says PG is a gene and doesn''t need to be converted
#         to a gene.  Otherwise PG is assumed to be a probe and converted

gene = PG
if (probe!="g") {
  gene = probe2Gene(PG,allProbes,allGenes)
}
write(gene,filename,sep="\n")

}

cor.test.l = function(x){
## Performs a Pearson correlation on a vector of genes
 ct = cor.test(x,var)
 return(c(ct$est,ct$p.val))
}
#library(BiocInstaller)
#biocLite("qvalue")
#install.packages(c("impute","dynamicTreeCut","flashClust","Hmisc","WGCNA","stringi","enrichR","filesstrings"))
library(impute)
library(dynamicTreeCut)
library(qvalue)
library(flashClust)
library(Hmisc)
library(WGCNA)
## ==========================================================================
## *
## *  Package WGCNA 1.66 loaded.
## *
## *    Important note: It appears that your system supports multi-threading,
## *    but it is not enabled within WGCNA in R. 
## *    To allow multi-threading within WGCNA with all available cores, use 
## *
## *          allowWGCNAThreads()
## *
## *    within R. Use disableWGCNAThreads() to disable threading if necessary.
## *    Alternatively, set the following environment variable on your system:
## *
## *          ALLOW_WGCNA_THREADS=<number_of_processors>
## *
## *    for example 
## *
## *          ALLOW_WGCNA_THREADS=8
## *
## *    To set the environment variable in linux bash shell, type 
## *
## *           export ALLOW_WGCNA_THREADS=8
## *
## *     before running R. Other operating systems or shells will
## *     have a similar command to achieve the same aim.
## *
## ==========================================================================
library(stringi)
library(stringr)
library(enrichR)#for pathway analysis
options(stringsAsFactors = FALSE)
#Mouse Metadata
metadata=read.csv("merge_GSE61915_GSE73503_GSE83931_metadata_SampleID.csv", header=T, sep=',')
metadata_1=metadata[,3:6]
rownames(metadata_1)=metadata$Sample_name
pheno=metadata_1

#Mouse Metadata coded
metadata_coded=read.csv("./InputMsAge/merge_GSE61915_GSE73503_GSE83931_metadata_SampleID_coded.csv", header=T, sep=',')
metadata_1_coded=metadata_coded[,3:6]
rownames(metadata_1_coded)=metadata$Sample_name
pheno_coded=metadata_1_coded

#Mouse Expression data
Expr_HuGene=read.csv("MsGenetoHuGeneAgeInterest_adjresiduals_edata_GeneID.csv", header =T, sep=',')
Expr_HuGene_1=Expr_HuGene[,-1]
rownames(Expr_HuGene_1)=Expr_HuGene$X
Expr_HuGene_1=Expr_HuGene_1[complete.cases(Expr_HuGene_1), ]
edata=Expr_HuGene_1
#Mouse data
datExprA1g=edata
metadataA1g=pheno
metadataA1g_coded=pheno_coded
#Remove unused variables
rm(metadata)
rm(metadata_1)
rm(pheno)
rm(metadata_coded)
rm(metadata_1_coded)
rm(pheno_coded)
rm(Expr_HuGene)
rm(Expr_HuGene_1)
rm(edata)
DT::datatable(datExprA1g[1:7,1:7])
dim(datExprA1g)
## [1] 13668    71

Step14: Removal of outliers and missing values using WGCNA package function ‘goodSamplesGenes’

#Before applying 'goodSamplesGenes'
dim(datExprA1g)
## [1] 13668    71
#check and applying 'goodSamplesGenes' on data A1
gsg = goodSamplesGenes(datExprA1g, verbose = 3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
gsg$allOK
## [1] TRUE
if (!gsg$allOK)
{
if (sum(!gsg$goodGenes)>0)
#printFlush(paste("Removing genes:", paste(names(datExprA1g)[!gsg$goodGenes], collapse = ", ")));
if (sum(!gsg$goodSamples)>0)
#printFlush(paste("Removing samples:", paste(rownames(datExprA1g)[!gsg$goodSamples], collapse = ", ")));
# Removal step
datExprA1g = datExprA1g[gsg$goodSamples, gsg$goodGenes]
}
#After applying 'goodSamplesGenes'
dim(datExprA1g)
## [1] 13668    71

Step 15: Optional: This step allows selection of top 7301 most variable genes, not most expressed genes (this minimizes noise and is compuationally efficient)

In the present analysis we have used 7301 genes which is same genes as was available in the human array WGCNA analysis

#Transpose data
datExprA1g1=as.data.frame(t(datExprA1g))
names(datExprA1g1)=rownames(datExprA1g)
rownames(datExprA1g1)=names(datExprA1g)
DT::datatable(datExprA1g1[1:7,1:7])
#Calculate variance
var = apply(datExprA1g1, 2, var)
dat = rbind(datExprA1g1,var)
rownames(dat) = c(rownames(datExprA1g1), "variance")
#order columns by variance
dat1 = dat[,order(dat["variance",], decreasing=T)]
#Remove row containing variance values
#Here uncomment to select all genes
#dat2 = dat1[1:dim(datExprA1g1)[1],1:dim(datExprA1g1)[2]]
#To select only a given number of top genes like top variable 7301 genes uncomment line below
dat2 = dat1[1:dim(datExprA1g1)[1],1:7301]

datExprA1g2=as.data.frame(t(dat2))
names(datExprA1g2)=rownames(dat2)
rownames(datExprA1g2)=names(dat2)
#remove unused variables
rm(var)

rm(dat1)
#Comparison of data A1 after variance based selction
dim(datExprA1g)
## [1] 13668    71
DT::datatable(datExprA1g[1:7,1:7])
dim(datExprA1g2)
## [1] 7301   71
DT::datatable(datExprA1g2[1:7,1:7])

here onwards will use datExprA1g2

Step16: Pick soft power and detect modules in data A1

#softPower estimate after 'goodSamplesgoodGenes' and variance based selection
#Pick power for the data A1
powers = c(c(1:10), seq(from = 12, to=40, by=2))
sftA1 = pickSoftThreshold(datExprA1g2, RsquaredCut=0.80, powerVector = powers, networkType="signed", moreNetworkConcepts=TRUE, verbose = 5, blockSize=5000)
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 71 of 71
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k. Density
## 1      1    0.162 -1300.0      -0.005710    69.5      69.6   69.8   0.994
## 2      2    0.162  -647.0      -0.006760    69.1      69.1   69.5   0.987
## 3      3    0.163  -431.0      -0.006000    68.6      68.7   69.3   0.981
## 4      4    0.162  -323.0      -0.006010    68.2      68.3   69.1   0.974
## 5      5    0.163  -259.0      -0.005250    67.8      67.9   68.8   0.968
## 6      6    0.163  -216.0      -0.004490    67.3      67.4   68.6   0.962
## 7      7    0.163  -185.0      -0.003730    66.9      67.0   68.4   0.956
## 8      8    0.163  -162.0      -0.003780    66.5      66.6   68.1   0.949
## 9      9    0.163  -144.0      -0.003020    66.0      66.2   67.9   0.943
## 10    10    0.163  -130.0       0.000439    65.6      65.8   67.7   0.937
## 11    12    0.163  -108.0       0.000971    64.8      65.0   67.2   0.925
## 12    14    0.163   -92.5       0.002500    63.9      64.2   66.8   0.913
## 13    16    0.164   -81.1       0.004020    63.1      63.4   66.3   0.902
## 14    18    0.164   -72.2       0.005530    62.3      62.6   65.9   0.890
## 15    20    0.165   -65.2       0.009140    61.5      61.8   65.5   0.879
## 16    22    0.174   -61.4       0.021500    60.7      61.1   65.0   0.867
## 17    24    0.175   -56.4       0.023000    59.9      60.3   64.6   0.856
## 18    26    0.175   -52.1       0.024500    59.2      59.6   64.2   0.845
## 19    28    0.174   -48.1       0.024300    58.4      58.8   63.7   0.835
## 20    30    0.175   -45.0       0.025900    57.7      58.1   63.3   0.824
## 21    32    0.178   -42.9       0.026700    56.9      57.4   62.9   0.814
## 22    34    0.178   -40.4       0.028200    56.2      56.7   62.5   0.803
## 23    36    0.179   -38.2       0.029700    55.5      56.0   62.1   0.793
## 24    38    0.179   -36.3       0.031200    54.8      55.3   61.7   0.783
## 25    40    0.179   -34.4       0.031700    54.1      54.7   61.3   0.773
##    Centralization Heterogeneity
## 1         0.00322       0.00128
## 2         0.00640       0.00255
## 3         0.00955       0.00383
## 4         0.01270       0.00510
## 5         0.01570       0.00638
## 6         0.01880       0.00765
## 7         0.02180       0.00892
## 8         0.02480       0.01020
## 9         0.02780       0.01150
## 10        0.03070       0.01270
## 11        0.03640       0.01530
## 12        0.04210       0.01780
## 13        0.04760       0.02030
## 14        0.05290       0.02280
## 15        0.05820       0.02540
## 16        0.06340       0.02790
## 17        0.06840       0.03040
## 18        0.07330       0.03290
## 19        0.07820       0.03540
## 20        0.08290       0.03790
## 21        0.08750       0.04040
## 22        0.09200       0.04290
## 23        0.09640       0.04540
## 24        0.10100       0.04780
## 25        0.10500       0.05030
par(mfrow = c(1,2));
cex1 = 0.9;
#scale-free topology fit index as function of soft-thresholding power
plot(sftA1$fitIndices[,1], -sign(sftA1$fitIndices[,3])*sftA1$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sftA1$fitIndices[,1], -sign(sftA1$fitIndices[,3])*sftA1$fitIndices[,2],
labels=powers,cex=cex1,col="red");
#this line corresponds to using R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sftA1$fitIndices[,1], sftA1$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sftA1$fitIndices[,1], sftA1$fitIndices[,5], labels=powers, cex=cex1,col="red")

dev.print(pdf, "A1_auto-power_plot.pdf",height=10,width=18)
## quartz_off_screen 
##                 2
#sftA1$powerEstimate is systems best guess
softPowerA1=sftA1$powerEstimate
softPowerA1
## [1] NA
#set soft power to value 12 for signed networks if above automatic detection detects low power.
softPowerA1=12
#Calculate weighted adjacency
adjacencyA1 = adjacency(t(datExprA1g2),power=softPowerA1,type="signed");
diag(adjacencyA1)=0
#Turn adjacency into topology overlap matrix
TOM=TOMsimilarity(adjacencyA1, TOMType="signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
dissTOMA1 = 1-TOM
#Hierarchial clustering based on TOM
geneTreeA1 = flashClust(as.dist(dissTOMA1), method="average")
#Display the networks visually:
#pdf("dendrogram.pdf",height=6,width=16)
#par(mar=c(1,1,1,1))
#par(mfrow=c(1,2))
plot(geneTreeA1,xlab="",sub="",main="Gene clustering on TOM-based dissimilarity (A1)",
labels=FALSE,hang=0.04);

#dev.off() 
dev.print(pdf,file="dendrogram.pdf",height=5,width=10)
## quartz_off_screen 
##                 2
#Modules for dataset A1 or modulesA1
mColorh=NULL
mColorhL=NULL
for (ds in 0:3){
tree = cutreeHybrid(dendro = geneTreeA1, pamStage=TRUE,
minClusterSize = (30-3*ds), cutHeight = 0.99,
deepSplit = ds, distM = dissTOMA1)
mColorh=cbind(mColorh,labels2colors(tree$labels));
mColorhL=cbind(mColorhL, paste("MsAge7K_", str_pad(tree$labels, 2, pad="0"), sep="" ))
}
##  ..done.
##  ..done.
##  ..done.
##  ..done.
#pdf("Module_choices.pdf", height=10,width=25);
plotDendroAndColors(geneTreeA1, mColorh, paste("dpSplt =",0:3), main = "",dendroLabels=FALSE);

#dev.off()
dev.print(pdf,file="Module_choices.pdf", height=5,width=10)
## quartz_off_screen 
##                 2
#Picked deepSplit or dpSplit 2 which gives small number of large modules
modulesA1 = mColorh[,3] #color labels of modules
modulesA1L= mColorhL[,3] #numeric label of modules
#number of genes per module
#Principal component for visualization
PCs1A = moduleEigengenes(t(datExprA1g2), colors=modulesA1)
ME_1A = PCs1A$eigengenes
distPC1A = 1-abs(cor(ME_1A,use="p"))
distPC1A = ifelse(is.na(distPC1A), 0, distPC1A)
pcTree1A = hclust(as.dist(distPC1A),method="a")
MDS_1A = cmdscale(as.dist(distPC1A),2)
colorsA1 = names(table(modulesA1))

par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
plot(pcTree1A, xlab="",ylab="",main="",sub="")

dev.print(pdf,file="ModuleEigengeneVisualizationsTree.pdf",height=5,width=10)
## quartz_off_screen 
##                 2
par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
plot(MDS_1A, col= colorsA1, main="MDS plot", cex=2, pch=19)

dev.print(pdf,file="ModuleEigengeneVisualizationsMDS.pdf",height=5,width=5)
## quartz_off_screen 
##                 2
PCs1AL = moduleEigengenes(t(datExprA1g2), colors=modulesA1L)
ME_1AL = PCs1AL$eigengenes
distPC1AL = 1-abs(cor(ME_1AL,use="p"))
distPC1AL = ifelse(is.na(distPC1AL), 0, distPC1AL)
pcTree1AL = hclust(as.dist(distPC1AL),method="a")
MDS_1AL = cmdscale(as.dist(distPC1AL),2)
colorsA1L = names(table(modulesA1L))

par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
plot(pcTree1AL, xlab="",ylab="",main="",sub="")

dev.print(pdf,file="ModuleEigengeneVisualizationsTreelabels.pdf",height=5,width=10)
## quartz_off_screen 
##                 2
par(mfrow=c(1,1), mar=c(0, 3, 1, 1) + 0.1, cex=1)
#plot(MDS_1AL, col= colorsA1L, main="MDS plot", cex=2, pch=19)
#dev.print(pdf,file="ModuleEigengeneVisualizationsMDSlabels.pdf",height=5,width=5)

Step17A: Module membership (kME) for network comparison and identification of hub genes

#kME to measure of correlations between each gene and each ME
#for data A1
geneModuleMembership1 = signedKME(t(datExprA1g2), ME_1A)
colnames(geneModuleMembership1)=paste("PC",colorsA1,".cor",sep="");
MMPvalue1=corPvalueStudent(as.matrix(geneModuleMembership1),dim(datExprA1g2)[[2]]);
colnames(MMPvalue1)=paste("PC",colorsA1,".pval",sep="");
Gene = rownames(datExprA1g2)
kMEtable1 = cbind(Gene,Gene,modulesA1)
for (i in 1:length(colorsA1))
kMEtable1 = cbind(kMEtable1, geneModuleMembership1[,i], MMPvalue1[,i])
colnames(kMEtable1)=c("PSID","Gene","Module",sort(c(colnames(geneModuleMembership1),
colnames(MMPvalue1))))
write.csv(kMEtable1,"kMEtable1colors.csv")
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
#ref: modified from https://support.bioconductor.org/p/94701/
topGenesKME = NULL
for (c in 1:length(colorsA1)){
kMErank1 = rank(-geneModuleMembership1[,c])
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
#topGenesKME = cbind(topGenesKME,Gene[maxKMErank<=10])
topGenesKME = cbind(topGenesKME,Gene[order(kMErank1, decreasing=FALSE)][1:10])
}
colnames(topGenesKME) = colorsA1
topGenesKME
##       black      blue      brown      cyan      green      greenyellow
##  [1,] "FMOD"     "COBL"    "CAMSAP2"  "CCDC153" "TIMP2"    "DSP"      
##  [2,] "PTGDS"    "RIMS3"   "HTT"      "AK7"     "CADM1"    "LCT"      
##  [3,] "SLC22A6"  "LGI2"    "SMG1"     "TMEM212" "DCN"      "DGKH"     
##  [4,] "ITIH2"    "ZDHHC22" "ATP8A1"   "DNAH6"   "KCNG1"    "ZBTB20"   
##  [5,] "ALDH1A2"  "SATB1"   "VPS13D"   "CROCC2"  "NR2F2"    "PROX1"    
##  [6,] "VTN"      "CPLX3"   "TAOK1"    "DNAH10"  "EFNB2"    "ORAI2"    
##  [7,] "SLC6A13"  "NECAB1"  "KIAA1109" "DNAH12"  "DIO3"     "SEMA5A"   
##  [8,] "SERPING1" "OLFM2"   "WNK3"     "CCDC170" "TRHR"     "KIAA1211L"
##  [9,] "GJB2"     "KCNT1"   "HIPK1"    "FRMPD2"  "TMEM255A" "NPNT"     
## [10,] "MGP"      "CUX2"    "UBR4"     "ACAP1"   "ADGRA1"   "WIPF3"    
##       grey      grey60    lightcyan magenta   midnightblue pink     
##  [1,] "VSTM5"   "LENG8"   "GOLGA8J" "PLEKHB1" "ARHGAP39"   "CSMD1"  
##  [2,] "RREB1"   "ZCCHC7"  "GOLGA8M" "TMEM63A" "TSPAN18"    "NRG3"   
##  [3,] "ZC3H6"   "TRANK1"  "GOLGA6B" "ERMN"    "GOLM1"      "LINGO2" 
##  [4,] "MALT1"   "TIA1"    "GOLGA8G" "ERBIN"   "KSR1"       "LRP1B"  
##  [5,] "ADARB2"  "FLNB"    "GOLGA8Q" "SEC14L5" "KNDC1"      "NLGN1"  
##  [6,] "ZKSCAN2" "EML5"    "GOLGA8R" "ASPA"    "OLFML2B"    "FGF14"  
##  [7,] "C4B"     "SNRNP70" "GOLGA8T" "SEPT4"   "SH3BP5"     "OPCML"  
##  [8,] "C4A"     "GUF1"    "GOLGA6A" "GALNT6"  "THSD4"      "KCNIP4" 
##  [9,] "PLEC"    "FAM228B" "GOLGA8A" "BCAS1"   "FKBP5"      "FAM155A"
## [10,] "FLNB"    "NKTR"    "GOLGA8S" "GATM"    "SIPA1L2"    "LRRTM4" 
##       purple   red        salmon   tan      turquoise yellow    
##  [1,] "ATP5ME" "ISL1"     "ARC"    "RPS18"  "TSPAN2"  "C4B"     
##  [2,] "TMA7"   "ZIC1"     "FOS"    "RPL12"  "CNP"     "C4A"     
##  [3,] "RPL41"  "PRDM12"   "JUNB"   "RPL14"  "GSN"     "CTSS"    
##  [4,] "RPL37A" "SCN5A"    "TIPARP" "RPL11"  "UGT8"    "LGALS3BP"
##  [5,] "NDUFA5" "PLD5"     "NR4A1"  "RPL17"  "PLP1"    "APOD"    
##  [6,] "RPL28"  "C1orf115" "EGR1"   "RPS6"   "ADAMTS4" "C1QC"    
##  [7,] "RPL38"  "SP8"      "EGR2"   "RPS16"  "MAL"     "TREM2"   
##  [8,] "ATP5MD" "ANO1"     "SIK1"   "NDUFB4" "CLDN11"  "C1QB"    
##  [9,] "NDUFB3" "CACNA2D2" "SIK1B"  "RPSA"   "SIRT2"   "MPEG1"   
## [10,] "RPL23"  "COL6A3"   "DUSP1"  "RPL18A" "DDR1"    "C1QA"
write.csv(topGenesKME,"A1_HubGenestopGenesKMEcolorsRankSorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
topGenesKME = NULL
for (c in 1:length(colorsA1)){
kMErank1 = rank(-geneModuleMembership1[,c])
maxKMErank = rank(apply(cbind(kMErank1),1,max))
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
topGenesKME = cbind(topGenesKME,Gene[maxKMErank<=10])
}
colnames(topGenesKME) = colorsA1
topGenesKME
##       black      blue      brown      cyan      green      greenyellow
##  [1,] "FMOD"     "CPLX3"   "WNK3"     "TMEM212" "DIO3"     "DSP"      
##  [2,] "PTGDS"    "ZDHHC22" "TAOK1"    "CROCC2"  "TRHR"     "LCT"      
##  [3,] "SLC22A6"  "COBL"    "HIPK1"    "CCDC153" "DCN"      "NPNT"     
##  [4,] "ALDH1A2"  "NECAB1"  "HTT"      "DNAH12"  "NR2F2"    "SEMA5A"   
##  [5,] "SLC6A13"  "RIMS3"   "ATP8A1"   "DNAH10"  "TMEM255A" "PROX1"    
##  [6,] "ITIH2"    "CUX2"    "KIAA1109" "AK7"     "KCNG1"    "WIPF3"    
##  [7,] "MGP"      "OLFM2"   "VPS13D"   "FRMPD2"  "EFNB2"    "DGKH"     
##  [8,] "GJB2"     "LGI2"    "CAMSAP2"  "ACAP1"   "ADGRA1"   "ZBTB20"   
##  [9,] "SERPING1" "KCNT1"   "UBR4"     "DNAH6"   "TIMP2"    "ORAI2"    
## [10,] "VTN"      "SATB1"   "SMG1"     "CCDC170" "CADM1"    "KIAA1211L"
##       grey      grey60    lightcyan magenta   midnightblue pink     
##  [1,] "C4B"     "FAM228B" "GOLGA8J" "GALNT6"  "TSPAN18"    "KCNIP4" 
##  [2,] "C4A"     "TIA1"    "GOLGA8M" "ERMN"    "FKBP5"      "OPCML"  
##  [3,] "RREB1"   "LENG8"   "GOLGA6B" "SEC14L5" "OLFML2B"    "LRRTM4" 
##  [4,] "ZC3H6"   "ZCCHC7"  "GOLGA8G" "ASPA"    "THSD4"      "LINGO2" 
##  [5,] "VSTM5"   "FLNB"    "GOLGA8Q" "TMEM63A" "KNDC1"      "FGF14"  
##  [6,] "ZKSCAN2" "NKTR"    "GOLGA8R" "GATM"    "GOLM1"      "NRG3"   
##  [7,] "ADARB2"  "GUF1"    "GOLGA8T" "BCAS1"   "KSR1"       "CSMD1"  
##  [8,] "FLNB"    "EML5"    "GOLGA6A" "ERBIN"   "SIPA1L2"    "NLGN1"  
##  [9,] "MALT1"   "TRANK1"  "GOLGA8A" "SEPT4"   "SH3BP5"     "LRP1B"  
## [10,] "PLEC"    "SNRNP70" "GOLGA8S" "PLEKHB1" "ARHGAP39"   "FAM155A"
##       purple   red        salmon   tan      turquoise yellow    
##  [1,] "ATP5ME" "ZIC1"     "FOS"    "RPL12"  "GSN"     "C4B"     
##  [2,] "ATP5MD" "COL6A3"   "EGR2"   "RPL11"  "CLDN11"  "C4A"     
##  [3,] "RPL38"  "ISL1"     "ARC"    "NDUFB4" "UGT8"    "LGALS3BP"
##  [4,] "RPL41"  "PRDM12"   "DUSP1"  "RPS18"  "MAL"     "APOD"    
##  [5,] "RPL37A" "SCN5A"    "SIK1"   "RPL18A" "PLP1"    "TREM2"   
##  [6,] "RPL28"  "PLD5"     "SIK1B"  "RPL17"  "TSPAN2"  "CTSS"    
##  [7,] "NDUFB3" "C1orf115" "JUNB"   "RPS16"  "ADAMTS4" "C1QA"    
##  [8,] "TMA7"   "SP8"      "EGR1"   "RPS6"   "CNP"     "MPEG1"   
##  [9,] "RPL23"  "ANO1"     "NR4A1"  "RPL14"  "DDR1"    "C1QC"    
## [10,] "NDUFA5" "CACNA2D2" "TIPARP" "RPSA"   "SIRT2"   "C1QB"
write.csv(topGenesKME,"A1_HubGenestopGenesKMEcolorsRankUnsorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.

Same as above for labeled modules

#kME to measure of correlations between each gene and each ME
#for data A1
geneModuleMembership1L = signedKME(t(datExprA1g2), ME_1AL)
colnames(geneModuleMembership1L)=paste("PC",colorsA1L,".cor",sep="");
MMPvalue1L=corPvalueStudent(as.matrix(geneModuleMembership1L),dim(datExprA1g2)[[2]]);
colnames(MMPvalue1L)=paste("PC",colorsA1L,".pval",sep="");
GeneL = rownames(datExprA1g2)
kMEtable1L = cbind(GeneL,GeneL,modulesA1L)
for (i in 1:length(colorsA1L))
kMEtable1L = cbind(kMEtable1L, geneModuleMembership1L[,i], MMPvalue1L[,i])
colnames(kMEtable1L)=c("PSID","Gene","Module",sort(c(colnames(geneModuleMembership1L),
colnames(MMPvalue1L))))
write.csv(kMEtable1L,"kMEtable1labels.csv")
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
#ref: modified from https://support.bioconductor.org/p/94701/
topGenesKMEL = NULL
for (c in 1:length(colorsA1L)){
kMErank1L = rank(-geneModuleMembership1L[,c])
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
#topGenesKMEL = cbind(topGenesKMEL,GeneL[maxKMErankL<=10])
topGenesKMEL = cbind(topGenesKMEL,GeneL[order(kMErank1L, decreasing=FALSE)][1:10])
}
colnames(topGenesKMEL) = colorsA1L
topGenesKMEL
##       MsAge7K_00 MsAge7K_01 MsAge7K_02 MsAge7K_03 MsAge7K_04 MsAge7K_05
##  [1,] "VSTM5"    "TSPAN2"   "COBL"     "CAMSAP2"  "C4B"      "TIMP2"   
##  [2,] "RREB1"    "CNP"      "RIMS3"    "HTT"      "C4A"      "CADM1"   
##  [3,] "ZC3H6"    "GSN"      "LGI2"     "SMG1"     "CTSS"     "DCN"     
##  [4,] "MALT1"    "UGT8"     "ZDHHC22"  "ATP8A1"   "LGALS3BP" "KCNG1"   
##  [5,] "ADARB2"   "PLP1"     "SATB1"    "VPS13D"   "APOD"     "NR2F2"   
##  [6,] "ZKSCAN2"  "ADAMTS4"  "CPLX3"    "TAOK1"    "C1QC"     "EFNB2"   
##  [7,] "C4B"      "MAL"      "NECAB1"   "KIAA1109" "TREM2"    "DIO3"    
##  [8,] "C4A"      "CLDN11"   "OLFM2"    "WNK3"     "C1QB"     "TRHR"    
##  [9,] "PLEC"     "SIRT2"    "KCNT1"    "HIPK1"    "MPEG1"    "TMEM255A"
## [10,] "FLNB"     "DDR1"     "CUX2"     "UBR4"     "C1QA"     "ADGRA1"  
##       MsAge7K_06 MsAge7K_07 MsAge7K_08 MsAge7K_09 MsAge7K_10 MsAge7K_11 
##  [1,] "ISL1"     "FMOD"     "CSMD1"    "PLEKHB1"  "ATP5ME"   "DSP"      
##  [2,] "ZIC1"     "PTGDS"    "NRG3"     "TMEM63A"  "TMA7"     "LCT"      
##  [3,] "PRDM12"   "SLC22A6"  "LINGO2"   "ERMN"     "RPL41"    "DGKH"     
##  [4,] "SCN5A"    "ITIH2"    "LRP1B"    "ERBIN"    "RPL37A"   "ZBTB20"   
##  [5,] "PLD5"     "ALDH1A2"  "NLGN1"    "SEC14L5"  "NDUFA5"   "PROX1"    
##  [6,] "C1orf115" "VTN"      "FGF14"    "ASPA"     "RPL28"    "ORAI2"    
##  [7,] "SP8"      "SLC6A13"  "OPCML"    "SEPT4"    "RPL38"    "SEMA5A"   
##  [8,] "ANO1"     "SERPING1" "KCNIP4"   "GALNT6"   "ATP5MD"   "KIAA1211L"
##  [9,] "CACNA2D2" "GJB2"     "FAM155A"  "BCAS1"    "NDUFB3"   "NPNT"     
## [10,] "COL6A3"   "MGP"      "LRRTM4"   "GATM"     "RPL23"    "WIPF3"    
##       MsAge7K_12 MsAge7K_13 MsAge7K_14 MsAge7K_15 MsAge7K_16 MsAge7K_17
##  [1,] "RPS18"    "ARC"      "CCDC153"  "ARHGAP39" "GOLGA8J"  "LENG8"   
##  [2,] "RPL12"    "FOS"      "AK7"      "TSPAN18"  "GOLGA8M"  "ZCCHC7"  
##  [3,] "RPL14"    "JUNB"     "TMEM212"  "GOLM1"    "GOLGA6B"  "TRANK1"  
##  [4,] "RPL11"    "TIPARP"   "DNAH6"    "KSR1"     "GOLGA8G"  "TIA1"    
##  [5,] "RPL17"    "NR4A1"    "CROCC2"   "KNDC1"    "GOLGA8Q"  "FLNB"    
##  [6,] "RPS6"     "EGR1"     "DNAH10"   "OLFML2B"  "GOLGA8R"  "EML5"    
##  [7,] "RPS16"    "EGR2"     "DNAH12"   "SH3BP5"   "GOLGA8T"  "SNRNP70" 
##  [8,] "NDUFB4"   "SIK1"     "CCDC170"  "THSD4"    "GOLGA6A"  "GUF1"    
##  [9,] "RPSA"     "SIK1B"    "FRMPD2"   "FKBP5"    "GOLGA8A"  "FAM228B" 
## [10,] "RPL18A"   "DUSP1"    "ACAP1"    "SIPA1L2"  "GOLGA8S"  "NKTR"
write.csv(topGenesKMEL,"A1_HubGenestopGenesKMElabelsRankSorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.
#Hub genes are genes that show significant correlation with MEs and high within-module connectivity 
#kME values are used to compare data A1 to determine which genes have extremely high kME values in networks
#and are thus genes are hubs in both networks 
topGenesKMEL = NULL
for (c in 1:length(colorsA1L)){
kMErank1L = rank(-geneModuleMembership1L[,c])
maxKMErankL = rank(apply(cbind(kMErank1L),1,max))
#maxKMErank = rank(apply(cbind(kMErank1,kMErank2+.00001),1,max))
topGenesKMEL = cbind(topGenesKMEL,GeneL[maxKMErankL<=10])
}
colnames(topGenesKMEL) = colorsA1L
topGenesKMEL
##       MsAge7K_00 MsAge7K_01 MsAge7K_02 MsAge7K_03 MsAge7K_04 MsAge7K_05
##  [1,] "C4B"      "GSN"      "CPLX3"    "WNK3"     "C4B"      "DIO3"    
##  [2,] "C4A"      "CLDN11"   "ZDHHC22"  "TAOK1"    "C4A"      "TRHR"    
##  [3,] "RREB1"    "UGT8"     "COBL"     "HIPK1"    "LGALS3BP" "DCN"     
##  [4,] "ZC3H6"    "MAL"      "NECAB1"   "HTT"      "APOD"     "NR2F2"   
##  [5,] "VSTM5"    "PLP1"     "RIMS3"    "ATP8A1"   "TREM2"    "TMEM255A"
##  [6,] "ZKSCAN2"  "TSPAN2"   "CUX2"     "KIAA1109" "CTSS"     "KCNG1"   
##  [7,] "ADARB2"   "ADAMTS4"  "OLFM2"    "VPS13D"   "C1QA"     "EFNB2"   
##  [8,] "FLNB"     "CNP"      "LGI2"     "CAMSAP2"  "MPEG1"    "ADGRA1"  
##  [9,] "MALT1"    "DDR1"     "KCNT1"    "UBR4"     "C1QC"     "TIMP2"   
## [10,] "PLEC"     "SIRT2"    "SATB1"    "SMG1"     "C1QB"     "CADM1"   
##       MsAge7K_06 MsAge7K_07 MsAge7K_08 MsAge7K_09 MsAge7K_10 MsAge7K_11 
##  [1,] "ZIC1"     "FMOD"     "KCNIP4"   "GALNT6"   "ATP5ME"   "DSP"      
##  [2,] "COL6A3"   "PTGDS"    "OPCML"    "ERMN"     "ATP5MD"   "LCT"      
##  [3,] "ISL1"     "SLC22A6"  "LRRTM4"   "SEC14L5"  "RPL38"    "NPNT"     
##  [4,] "PRDM12"   "ALDH1A2"  "LINGO2"   "ASPA"     "RPL41"    "SEMA5A"   
##  [5,] "SCN5A"    "SLC6A13"  "FGF14"    "TMEM63A"  "RPL37A"   "PROX1"    
##  [6,] "PLD5"     "ITIH2"    "NRG3"     "GATM"     "RPL28"    "WIPF3"    
##  [7,] "C1orf115" "MGP"      "CSMD1"    "BCAS1"    "NDUFB3"   "DGKH"     
##  [8,] "SP8"      "GJB2"     "NLGN1"    "ERBIN"    "TMA7"     "ZBTB20"   
##  [9,] "ANO1"     "SERPING1" "LRP1B"    "SEPT4"    "RPL23"    "ORAI2"    
## [10,] "CACNA2D2" "VTN"      "FAM155A"  "PLEKHB1"  "NDUFA5"   "KIAA1211L"
##       MsAge7K_12 MsAge7K_13 MsAge7K_14 MsAge7K_15 MsAge7K_16 MsAge7K_17
##  [1,] "RPL12"    "FOS"      "TMEM212"  "TSPAN18"  "GOLGA8J"  "FAM228B" 
##  [2,] "RPL11"    "EGR2"     "CROCC2"   "FKBP5"    "GOLGA8M"  "TIA1"    
##  [3,] "NDUFB4"   "ARC"      "CCDC153"  "OLFML2B"  "GOLGA6B"  "LENG8"   
##  [4,] "RPS18"    "DUSP1"    "DNAH12"   "THSD4"    "GOLGA8G"  "ZCCHC7"  
##  [5,] "RPL18A"   "SIK1"     "DNAH10"   "KNDC1"    "GOLGA8Q"  "FLNB"    
##  [6,] "RPL17"    "SIK1B"    "AK7"      "GOLM1"    "GOLGA8R"  "NKTR"    
##  [7,] "RPS16"    "JUNB"     "FRMPD2"   "KSR1"     "GOLGA8T"  "GUF1"    
##  [8,] "RPS6"     "EGR1"     "ACAP1"    "SIPA1L2"  "GOLGA6A"  "EML5"    
##  [9,] "RPL14"    "NR4A1"    "DNAH6"    "SH3BP5"   "GOLGA8A"  "TRANK1"  
## [10,] "RPSA"     "TIPARP"   "CCDC170"  "ARHGAP39" "GOLGA8S"  "SNRNP70"
write.csv(topGenesKMEL,"A1_HubGenestopGenesKMElabelsRankUnsorted.csv")
#These genes or probes represent the top 10 hub genes for modules in data A1 networks.

Step17B: Intramodular connectivity (kIM or kWithin) and whole network connectivity (kTotal) for network comparison

#Ref: WGCNA tutorial Steve Horvath Peter Langfelder 2011
#intramodular connectivity is mathematically the sum of module edge weights or "degree" of connectivity between a given node or gene and other genes or nodes within the module. 
#For colors
IntraModularConnectivity1=intramodularConnectivity(adjacencyA1,modulesA1)
#We get the color labels for the genes 
GeneModule1=kMEtable1[,c("PSID","Gene","Module")]
rownames(GeneModule1)=rownames(kMEtable1)
#merge the intramodular connectivity dataframe with module names by rownames that are gene symbols
IntraModularConnectivity2=merge(GeneModule1,IntraModularConnectivity1, by="row.names")
#drop the extra row.name column
IntraModularConnectivity3=IntraModularConnectivity2[,-c(1)]
DT::datatable(head(IntraModularConnectivity3))
#export kWithin and kTotal for all genes 
write.csv(IntraModularConnectivity1,"kWithintable1colors.csv")

Same as above for labeled modules

#Ref: WGCNA tutorial Steve Horvath Peter Langfelder 2011
#intramodular connectivity is mathematically the sum of module edge weights or "degree" of connectivity between a given node or gene and other genes or nodes within the module. 
#For colors
IntraModularConnectivity1L=intramodularConnectivity(adjacencyA1,modulesA1L)
#We get the number labels for the genes 
GeneModule1L=kMEtable1L[,c("PSID","Gene","Module")]
rownames(GeneModule1L)=rownames(kMEtable1L)
#merge the intramodular connectivity dataframe with module names by rownames that are gene symbols
IntraModularConnectivity2L=merge(GeneModule1L,IntraModularConnectivity1L, by="row.names")
#drop the extra row.name column
IntraModularConnectivity3L=IntraModularConnectivity2L[,-c(1)]
DT::datatable(head(IntraModularConnectivity3L))
#export kWithin and kTotal for all genes 
write.csv(IntraModularConnectivity1L,"kWithintable1labels.csv")

Step18: Generating files for doing GO, KEEG and Reactome annotation of genes in a given module

#Make a folder first called 'Module_GeneskMEHubs' change full path below as per your system
dir.create("./Module_GeneskMEHubs")
folder = "Module_GeneskMEHubs/"
for (c in colorsA1){
fn = paste(folder, c, ".txt",sep="");
write.geneList(Gene[modulesA1==c], fn)
};
write(Gene,paste(folder,"all.txt",sep=""))
#The folder should now have files titled "<color>.txt" that contain the gene names of every gene in that module,as well as a file titled "all.txt" that has every gene in your data set.
#make a summary file of all the genes in modules, similar to kME files but with only genes and module names as above
geneListsModuleSummary=cbind(Gene,modulesA1)
write.csv(geneListsModuleSummary,"geneListsModuleSummarycolors.csv")

Same as above for labeled modules

#Make a folder first called 'Module_GeneskMEHubs' change full path below as per your system
folder = "Module_GeneskMEHubs/"
for (c in colorsA1L){
fn = paste(folder, c, ".txt",sep="");
write.geneList(GeneL[modulesA1L==c], fn)
};
write(GeneL,paste(folder,"allL.txt",sep=""))
#The folder should now have files titled "<color>.txt" that contain the gene names of every gene in that module,as well as a file titled "all.txt" that has every gene in your data set.
#make a summary file of all the genes in modules, similar to kME files but with only genes and module names as above
geneListsModuleSummaryL=cbind(GeneL,modulesA1L)
write.csv(geneListsModuleSummaryL,"geneListsModuleSummarylabels.csv")

Step19: Visualization of trait module relatonships for data A1

In this step we use the coded metadata files

Relating modules to physiological traits for A1

# For data A1
# Define numbers of genes and samples
nGenesA1 = nrow(datExprA1g2)
nSamplesA1 = ncol(datExprA1g2)
# Recalculate MEs with color labels
MEs0A1= moduleEigengenes(t(datExprA1g2),modulesA1)$eigengenes
MEsA1= orderMEs(MEs0A1)
modTraitCorA1 = cor(MEsA1, metadataA1g_coded, use = "p")
modTraitPA1 = corPvalueStudent(modTraitCorA1, nSamplesA1)
textMatrixA1 = paste(signif(modTraitCorA1, 2), "\n(",
signif(modTraitPA1, 1), ")", sep = "")
dim(textMatrixA1) = dim(modTraitCorA1)
par(mar = c(6, 8.5, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = modTraitCorA1, xLabels = names(metadataA1g_coded),
yLabels = names(MEsA1), ySymbols = names(MEsA1), 
colorLabels =FALSE,colors=greenWhiteRed(50),textMatrix=textMatrixA1,
setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1),
main = paste("A1_Module-trait relationships colors"))

dev.print(pdf,"A1_relating modules to trait colors.pdf", width=5, height=5)
## quartz_off_screen 
##                 2
#This is for color module trait table
colnames(modTraitPA1) = paste("p.value.", colnames(modTraitCorA1), sep="");
out3<-cbind(Module=rownames(modTraitCorA1), modTraitCorA1, modTraitPA1)
dim(out3)
## [1] 18  9
write.table(out3, "A1_relating modules to trait colors.csv", sep=",",row.names=F)

Same as above for labeled modules

Relating modules to physiological traits for A1

# For data A1
# Define numbers of genes and samples
nGenesA1L = nrow(datExprA1g2)
nSamplesA1L = ncol(datExprA1g2)
# Recalculate MEs with color labels
MEs0A1L= moduleEigengenes(t(datExprA1g2),modulesA1L)$eigengenes
MEsA1L= orderMEs(MEs0A1L)
modTraitCorA1L = cor(MEsA1L, metadataA1g_coded, use = "p")
modTraitPA1L = corPvalueStudent(modTraitCorA1L, nSamplesA1L)
textMatrixA1L = paste(signif(modTraitCorA1L, 2), "\n(",
signif(modTraitPA1L, 1), ")", sep = "")
dim(textMatrixA1L) = dim(modTraitCorA1L)
par(mar = c(6, 8.5, 3, 3))
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = modTraitCorA1L, xLabels = names(metadataA1g_coded),
yLabels = names(MEsA1L), ySymbols = names(MEsA1L), 
colorLabels =FALSE,colors=greenWhiteRed(50),textMatrix=textMatrixA1L,
setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1),
main = paste("A1_Module-trait relationships labels"))

dev.print(pdf,"A1_relating modules to trait labels.pdf", width=5, height=5)
## quartz_off_screen 
##                 2
#This is for label module trait table
colnames(modTraitPA1L) = paste("p.value.", colnames(modTraitCorA1L), sep="");
out3L<-cbind(Module=rownames(modTraitCorA1L), modTraitCorA1L, modTraitPA1L)
dim(out3L)
## [1] 18  9
write.table(out3L, "A1_relating modules to trait labels.csv", sep=",",row.names=F)
save.image(file="temp.RData")
rm(list=ls())
gc()
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  4227193 225.8   13091016  699.2         NA  13091016  699.2
## Vcells 10438201  79.7  365889846 2791.6     102400 457362243 3489.4
#To reload uncomment code below
#load(file="temp.RData")

Step20: Additional step for conversion of gene symbols mouse to Human gene symbols

load(file="temp.RData")
library(Rcpp)
library(stringi)
library(biomaRt)
#The useMart() function can now be used to connect to a specified BioMart database, this must be a valid name given by listMarts(). In the next example we choose to query the Ensembl BioMart database.
listMarts()
##                biomart               version
## 1 ENSEMBL_MART_ENSEMBL      Ensembl Genes 94
## 2   ENSEMBL_MART_MOUSE      Mouse strains 94
## 3     ENSEMBL_MART_SNP  Ensembl Variation 94
## 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 94
ensembl=useMart("ensembl")
listDatasets(ensembl)
##                            dataset
## 1         acalliptera_gene_ensembl
## 2       acarolinensis_gene_ensembl
## 3        acitrinellus_gene_ensembl
## 4        amelanoleuca_gene_ensembl
## 5          amexicanus_gene_ensembl
## 6          anancymaae_gene_ensembl
## 7          aocellaris_gene_ensembl
## 8            apercula_gene_ensembl
## 9      aplatyrhynchos_gene_ensembl
## 10      apolyacanthus_gene_ensembl
## 11       atestudineus_gene_ensembl
## 12            btaurus_gene_ensembl
## 13            caperea_gene_ensembl
## 14              catys_gene_ensembl
## 15         ccapucinus_gene_ensembl
## 16         cchok1gshd_gene_ensembl
## 17            ccrigri_gene_ensembl
## 18           celegans_gene_ensembl
## 19        cfamiliaris_gene_ensembl
## 20            chircus_gene_ensembl
## 21         choffmanni_gene_ensembl
## 22      cintestinalis_gene_ensembl
## 23           cjacchus_gene_ensembl
## 24          clanigera_gene_ensembl
## 25         cpalliatus_gene_ensembl
## 26         cporcellus_gene_ensembl
## 27           csabaeus_gene_ensembl
## 28          csavignyi_gene_ensembl
## 29        csemilaevis_gene_ensembl
## 30          csyrichta_gene_ensembl
## 31        cvariegatus_gene_ensembl
## 32      dmelanogaster_gene_ensembl
## 33      dnovemcinctus_gene_ensembl
## 34             dordii_gene_ensembl
## 35             drerio_gene_ensembl
## 36           eburgeri_gene_ensembl
## 37          ecaballus_gene_ensembl
## 38         eeuropaeus_gene_ensembl
## 39            elucius_gene_ensembl
## 40          etelfairi_gene_ensembl
## 41        falbicollis_gene_ensembl
## 42             fcatus_gene_ensembl
## 43        fdamarensis_gene_ensembl
## 44      fheteroclitus_gene_ensembl
## 45         gaculeatus_gene_ensembl
## 46           gaffinis_gene_ensembl
## 47            ggallus_gene_ensembl
## 48           ggorilla_gene_ensembl
## 49            gmorhua_gene_ensembl
## 50           hburtoni_gene_ensembl
## 51             hcomes_gene_ensembl
## 52            hfemale_gene_ensembl
## 53              hmale_gene_ensembl
## 54           hsapiens_gene_ensembl
## 55         ipunctatus_gene_ensembl
## 56  itridecemlineatus_gene_ensembl
## 57           jjaculus_gene_ensembl
## 58        kmarmoratus_gene_ensembl
## 59          lafricana_gene_ensembl
## 60          lbergylta_gene_ensembl
## 61         lchalumnae_gene_ensembl
## 62          loculatus_gene_ensembl
## 63             malbus_gene_ensembl
## 64           marmatus_gene_ensembl
## 65           mauratus_gene_ensembl
## 66            mcaroli_gene_ensembl
## 67         mdomestica_gene_ensembl
## 68      mfascicularis_gene_ensembl
## 69              mfuro_gene_ensembl
## 70         mgallopavo_gene_ensembl
## 71       mleucophaeus_gene_ensembl
## 72         mlucifugus_gene_ensembl
## 73              mmola_gene_ensembl
## 74           mmulatta_gene_ensembl
## 75           mmurinus_gene_ensembl
## 76          mmusculus_gene_ensembl
## 77        mnemestrina_gene_ensembl
## 78       mochrogaster_gene_ensembl
## 79            mpahari_gene_ensembl
## 80           mspretus_gene_ensembl
## 81             mzebra_gene_ensembl
## 82         nbrichardi_gene_ensembl
## 83           neugenii_gene_ensembl
## 84            ngalili_gene_ensembl
## 85        nleucogenys_gene_ensembl
## 86          oanatinus_gene_ensembl
## 87             oaries_gene_ensembl
## 88         ocuniculus_gene_ensembl
## 89             odegus_gene_ensembl
## 90         ogarnettii_gene_ensembl
## 91               ohni_gene_ensembl
## 92              ohsok_gene_ensembl
## 93           olatipes_gene_ensembl
## 94        omelastigma_gene_ensembl
## 95         oniloticus_gene_ensembl
## 96          oprinceps_gene_ensembl
## 97            pabelii_gene_ensembl
## 98           paltaica_gene_ensembl
## 99            panubis_gene_ensembl
## 100          pbairdii_gene_ensembl
## 101         pcapensis_gene_ensembl
## 102        pcoquereli_gene_ensembl
## 103          pformosa_gene_ensembl
## 104       pkingsleyae_gene_ensembl
## 105        platipinna_gene_ensembl
## 106   pmagnuspinnatus_gene_ensembl
## 107          pmarinus_gene_ensembl
## 108         pmexicana_gene_ensembl
## 109        pnattereri_gene_ensembl
## 110         pnyererei_gene_ensembl
## 111         ppaniscus_gene_ensembl
## 112           ppardus_gene_ensembl
## 113       preticulata_gene_ensembl
## 114         psinensis_gene_ensembl
## 115      ptroglodytes_gene_ensembl
## 116         pvampyrus_gene_ensembl
## 117            rbieti_gene_ensembl
## 118       rnorvegicus_gene_ensembl
## 119        rroxellana_gene_ensembl
## 120          saraneus_gene_ensembl
## 121      sboliviensis_gene_ensembl
## 122       scerevisiae_gene_ensembl
## 123         sdorsalis_gene_ensembl
## 124         sdumerili_gene_ensembl
## 125         sformosus_gene_ensembl
## 126         sharrisii_gene_ensembl
## 127          smaximus_gene_ensembl
## 128         spartitus_gene_ensembl
## 129           sscrofa_gene_ensembl
## 130        tbelangeri_gene_ensembl
## 131          tguttata_gene_ensembl
## 132     tnigroviridis_gene_ensembl
## 133         trubripes_gene_ensembl
## 134        ttruncatus_gene_ensembl
## 135            vpacos_gene_ensembl
## 136       xcouchianus_gene_ensembl
## 137        xmaculatus_gene_ensembl
## 138       xtropicalis_gene_ensembl
##                                                      description
## 1                               Eastern happy genes (fAstCal1.2)
## 2                                 Anole lizard genes (AnoCar2.0)
## 3                                 Midas cichlid genes (Midas_v5)
## 4                                          Panda genes (ailMel1)
## 5                       Cave fish genes (Astyanax_mexicanus-2.0)
## 6                             Ma's night monkey genes (Anan_2.0)
## 7                            Clown anemonefish genes (AmpOce1.0)
## 8                               Orange clownfish genes (Nemo_v1)
## 9                                      Duck genes (BGI_duck_1.0)
## 10                             Spiny chromis genes (ASM210954v1)
## 11                             Climbing perch genes (fAnaTes1.1)
## 12                                            Cow genes (UMD3.1)
## 13                         Brazilian guinea pig genes (CavAp1.0)
## 14                               Sooty mangabey genes (Caty_1.0)
## 15                           Capuchin genes (Cebus_imitator-1.0)
## 16                  Chinese hamster CHOK1GS genes (CHOK1GS_HDv1)
## 17                     Chinese hamster CriGri genes (CriGri_1.0)
## 18                       Caenorhabditis elegans genes (WBcel235)
## 19                                         Dog genes (CanFam3.1)
## 20                                             Goat genes (ARS1)
## 21                                         Sloth genes (choHof1)
## 22                                     C.intestinalis genes (KH)
## 23                                  Marmoset genes (ASM275486v1)
## 24                      Long-tailed chinchilla genes (ChiLan1.0)
## 25                            Angola colobus genes (Cang.pa_1.0)
## 26                                  Guinea Pig genes (Cavpor3.0)
## 27                                  Vervet-AGM genes (ChlSab1.1)
## 28                                   C.savignyi genes (CSAV 2.0)
## 29                                  Tongue sole genes (Cse_v1.0)
## 30                        Tarsier genes (Tarsius_syrichta-2.0.1)
## 31                    Sheepshead minnow genes (C_variegatus-1.0)
## 32                                        Fruitfly genes (BDGP6)
## 33                                   Armadillo genes (Dasnov3.0)
## 34                                 Kangaroo rat genes (Dord_2.0)
## 35                                      Zebrafish genes (GRCz11)
## 36                                  Hagfish genes (Eburgeri_3.2)
## 37                                       Horse genes (Equ Cab 2)
## 38                                      Hedgehog genes (eriEur1)
## 39                                 Northern pike genes (Eluc_V3)
## 40                         Lesser hedgehog tenrec genes (TENREC)
## 41                                 Flycatcher genes (FicAlb_1.4)
## 42                                   Cat genes (Felis_catus_9.0)
## 43                              Damara mole rat genes (DMR_v1.0)
## 44                 Mummichog genes (Fundulus_heteroclitus-3.0.2)
## 45                                  Stickleback genes (BROAD S1)
## 46                      Western mosquitofish genes (ASM309773v1)
## 47                             Chicken genes (Gallus_gallus-5.0)
## 48                                       Gorilla genes (gorGor4)
## 49                                           Cod genes (gadMor1)
## 50                       Burton's mouthbrooder genes (AstBur1.0)
## 51                    Tiger tail seahorse genes (H_comes_QL1_v1)
## 52               Naked mole-rat female genes (HetGla_female_1.0)
## 53                        Naked mole-rat male genes (HetGla_1.0)
## 54                                      Human genes (GRCh38.p12)
## 55                            Channel catfish genes (IpCoco_1.2)
## 56                                    Squirrel genes (SpeTri2.0)
## 57                      Lesser Egyptian jerboa genes (JacJac1.0)
## 58                          Mangrove rivulus genes (ASM164957v1)
## 59                                    Elephant genes (Loxafr3.0)
## 60                              Ballan wrasse genes (BallGen_V1)
## 61                                    Coelacanth genes (LatCha1)
## 62                                   Spotted gar genes (LepOcu1)
## 63                                 Swamp eel genes (M_albus_1.0)
## 64                                Zig-zag eel genes (fMasArm1.1)
## 65                              Golden Hamster genes (MesAur1.0)
## 66                          Ryukyu mouse genes (CAROLI_EIJ_v1.1)
## 67                                       Opossum genes (monDom5)
## 68           Crab-eating macaque genes (Macaca_fascicularis_5.0)
## 69                                   Ferret genes (MusPutFur1.0)
## 70                                    Turkey genes (Turkey_2.01)
## 71                                     Drill genes (Mleu.le_1.0)
## 72                                    Microbat genes (Myoluc2.0)
## 73                             Ocean sunfish genes (ASM169857v1)
## 74                                    Macaque genes (Mmul_8.0.1)
## 75                                  Mouse Lemur genes (Mmur_3.0)
## 76                                       Mouse genes (GRCm38.p6)
## 77                           Pig-tailed macaque genes (Mnem_1.0)
## 78                                Prairie vole genes (MicOch1.0)
## 79                           Shrew mouse genes (PAHARI_EIJ_v1.1)
## 80                           Algerian mouse genes (SPRET_EiJ_v1)
## 81                             Zebra mbuna genes (M_zebra_UMD2a)
## 82                            Lyretail cichlid genes (NeoBri1.0)
## 83                                      Wallaby genes (Meug_1.0)
## 84  Upper Galilee mountains blind mole rat genes (S.galili_v1.0)
## 85                                       Gibbon genes (Nleu_3.0)
## 86                                        Platypus genes (OANA5)
## 87                                        Sheep genes (Oar_v3.1)
## 88                                      Rabbit genes (OryCun2.0)
## 89                                        Degu genes (OctDeg1.0)
## 90                                      Bushbaby genes (OtoGar3)
## 91                       Japanese Medaka HNI genes (ASM223471v1)
## 92                      Japanese Medaka HSOK genes (ASM223469v1)
## 93                      Japanese Medaka HdrR genes (ASM223467v1)
## 94                            Indian medaka genes (Om_v0.7.RACA)
## 95                                     Tilapia genes (Orenil1.0)
## 96                                    Pika genes (OchPri2.0-Ens)
## 97                                       Orangutan genes (PPYG2)
## 98                                       Tiger genes (PanTig1.0)
## 99                                 Olive baboon genes (Panu_3.0)
## 100                Northern American deer mouse genes (Pman_1.0)
## 101                                        Hyrax genes (proCap1)
## 102                           Coquerel's sifaka genes (Pcoq_1.0)
## 103                  Amazon molly genes (Poecilia_formosa-5.1.2)
## 104                  Paramormyrops kingsleyae genes (PKINGS_0.1)
## 105                        Sailfin molly genes (P_latipinna-1.0)
## 106                          Big-finned mudskipper genes (PM.fa)
## 107                                 Lamprey genes (Pmarinus_7.0)
## 108                        Shortfin molly genes (P_mexicana-1.0)
## 109      Red-bellied piranha genes (Pygocentrus_nattereri-1.0.2)
## 110                        Pundamilia nyererei genes (PunNye1.0)
## 111                                     Bonobo genes (panpan1.1)
## 112                                    Leopard genes (PanPar1.0)
## 113                            Guppy genes (Guppy_female_1.0_MT)
## 114                  Chinese softshell turtle genes (PelSin_1.0)
## 115                               Chimpanzee genes (Pan_tro_3.0)
## 116                                      Megabat genes (pteVam1)
## 117                  Black snub-nosed monkey genes (ASM169854v1)
## 118                                         Rat genes (Rnor_6.0)
## 119                     Golden snub-nosed monkey genes (Rrox_v1)
## 120                                        Shrew genes (sorAra1)
## 121                   Bolivian squirrel monkey genes (SaiBol1.0)
## 122                     Saccharomyces cerevisiae genes (R64-1-1)
## 123                          Yellowtail amberjack genes (Sedor1)
## 124                            Greater amberjack genes (Sdu_1.0)
## 125                         Asian bonytongue genes (ASM162426v1)
## 126                       Tasmanian devil genes (Devil_ref v7.0)
## 127                                   Turbot genes (ASM318616v1)
## 128          Bicolor damselfish genes (Stegastes_partitus-1.0.2)
## 129                                      Pig genes (Sscrofa11.1)
## 130                                   Tree Shrew genes (tupBel1)
## 131                              Zebra Finch genes (taeGut3.2.4)
## 132                              Tetraodon genes (TETRAODON 8.0)
## 133                                           Fugu genes (FUGU5)
## 134                                      Dolphin genes (turTru1)
## 135                                       Alpaca genes (vicPac1)
## 136     Monterrey platyfish genes (Xiphophorus_couchianus-4.0.1)
## 137                       Platyfish genes (X_maculatus-5.0-male)
## 138                                      Xenopus genes (JGI 4.2)
##                          version
## 1                     fAstCal1.2
## 2                      AnoCar2.0
## 3                       Midas_v5
## 4                        ailMel1
## 5         Astyanax_mexicanus-2.0
## 6                       Anan_2.0
## 7                      AmpOce1.0
## 8                        Nemo_v1
## 9                   BGI_duck_1.0
## 10                   ASM210954v1
## 11                    fAnaTes1.1
## 12                        UMD3.1
## 13                      CavAp1.0
## 14                      Caty_1.0
## 15            Cebus_imitator-1.0
## 16                  CHOK1GS_HDv1
## 17                    CriGri_1.0
## 18                      WBcel235
## 19                     CanFam3.1
## 20                          ARS1
## 21                       choHof1
## 22                            KH
## 23                   ASM275486v1
## 24                     ChiLan1.0
## 25                   Cang.pa_1.0
## 26                     Cavpor3.0
## 27                     ChlSab1.1
## 28                      CSAV 2.0
## 29                      Cse_v1.0
## 30        Tarsius_syrichta-2.0.1
## 31              C_variegatus-1.0
## 32                         BDGP6
## 33                     Dasnov3.0
## 34                      Dord_2.0
## 35                        GRCz11
## 36                  Eburgeri_3.2
## 37                     Equ Cab 2
## 38                       eriEur1
## 39                       Eluc_V3
## 40                        TENREC
## 41                    FicAlb_1.4
## 42               Felis_catus_9.0
## 43                      DMR_v1.0
## 44   Fundulus_heteroclitus-3.0.2
## 45                      BROAD S1
## 46                   ASM309773v1
## 47             Gallus_gallus-5.0
## 48                       gorGor4
## 49                       gadMor1
## 50                     AstBur1.0
## 51                H_comes_QL1_v1
## 52             HetGla_female_1.0
## 53                    HetGla_1.0
## 54                    GRCh38.p12
## 55                    IpCoco_1.2
## 56                     SpeTri2.0
## 57                     JacJac1.0
## 58                   ASM164957v1
## 59                     Loxafr3.0
## 60                    BallGen_V1
## 61                       LatCha1
## 62                       LepOcu1
## 63                   M_albus_1.0
## 64                    fMasArm1.1
## 65                     MesAur1.0
## 66               CAROLI_EIJ_v1.1
## 67                       monDom5
## 68       Macaca_fascicularis_5.0
## 69                  MusPutFur1.0
## 70                   Turkey_2.01
## 71                   Mleu.le_1.0
## 72                     Myoluc2.0
## 73                   ASM169857v1
## 74                    Mmul_8.0.1
## 75                      Mmur_3.0
## 76                     GRCm38.p6
## 77                      Mnem_1.0
## 78                     MicOch1.0
## 79               PAHARI_EIJ_v1.1
## 80                  SPRET_EiJ_v1
## 81                 M_zebra_UMD2a
## 82                     NeoBri1.0
## 83                      Meug_1.0
## 84                 S.galili_v1.0
## 85                      Nleu_3.0
## 86                         OANA5
## 87                      Oar_v3.1
## 88                     OryCun2.0
## 89                     OctDeg1.0
## 90                       OtoGar3
## 91                   ASM223471v1
## 92                   ASM223469v1
## 93                   ASM223467v1
## 94                  Om_v0.7.RACA
## 95                     Orenil1.0
## 96                 OchPri2.0-Ens
## 97                         PPYG2
## 98                     PanTig1.0
## 99                      Panu_3.0
## 100                     Pman_1.0
## 101                      proCap1
## 102                     Pcoq_1.0
## 103       Poecilia_formosa-5.1.2
## 104                   PKINGS_0.1
## 105              P_latipinna-1.0
## 106                        PM.fa
## 107                 Pmarinus_7.0
## 108               P_mexicana-1.0
## 109  Pygocentrus_nattereri-1.0.2
## 110                    PunNye1.0
## 111                    panpan1.1
## 112                    PanPar1.0
## 113          Guppy_female_1.0_MT
## 114                   PelSin_1.0
## 115                  Pan_tro_3.0
## 116                      pteVam1
## 117                  ASM169854v1
## 118                     Rnor_6.0
## 119                      Rrox_v1
## 120                      sorAra1
## 121                    SaiBol1.0
## 122                      R64-1-1
## 123                       Sedor1
## 124                      Sdu_1.0
## 125                  ASM162426v1
## 126               Devil_ref v7.0
## 127                  ASM318616v1
## 128     Stegastes_partitus-1.0.2
## 129                  Sscrofa11.1
## 130                      tupBel1
## 131                  taeGut3.2.4
## 132                TETRAODON 8.0
## 133                        FUGU5
## 134                      turTru1
## 135                      vicPac1
## 136 Xiphophorus_couchianus-4.0.1
## 137         X_maculatus-5.0-male
## 138                      JGI 4.2
CellType=read.csv('./InputMsAge/CellType_GSE52564_FCover20_Signature.csv', header=T, sep=',')
CellType=CellType[!duplicated(CellType$Gene_mouse),]
rownames(CellType)=CellType$Gene_mouse
mouse = useMart('ENSEMBL_MART_ENSEMBL', dataset = 'mmusculus_gene_ensembl')
Human = useMart('ENSEMBL_MART_ENSEMBL', dataset = 'hsapiens_gene_ensembl')
myMap = getLDS(attributes = "mgi_symbol", filters = 'mgi_symbol',
    values = CellType$Gene_mouse, mart = mouse, 
    attributesL = c("hgnc_symbol"), martL = Human)
#Some genes will be duplicates (HLA), so let's make them unique.
myMap=myMap[!duplicated(myMap$HGNC.symbol), ]
#unique(myMap$HGNC.symbol)
myMap_new<-myMap
names(myMap_new) <- c("Gene_mouse", "HuGene")
DT::datatable(head(myMap_new, n=10))
CellType_Hu=merge(CellType,myMap_new,by='Gene_mouse')
colnames(CellType_Hu)[colnames(CellType_Hu)=="Gene_mouse"] <- "MsGene"
CellType_Hu=CellType_Hu[c("HuGene","CellType_Vs_Others","MsGene")]
CellType_Hu=CellType_Hu[!duplicated(CellType_Hu$HuGene), ]
rownames(CellType_Hu)=CellType_Hu$HuGene
CellType_Hu=CellType_Hu[,-1]
write.csv(CellType_Hu,"CellType_GSE52564_FCover20_Signature_Hu.csv")
celltype=read.csv("CellType_GSE52564_FCover20_Signature_Hu.csv",sep=',')
cellTypeGeneCount=as.data.frame(table(celltype$CellType_Vs_Others))
cellTypeGeneCount
##                                Var1 Freq
## 1    AstroByothersCuffdiff2FCover20  451
## 2  EndotheByothersCuffdiff2FCover20  626
## 3 MicrogliByothersCuffdiff2FCover20 1147
## 4 MyeOligoByothersCuffdiff2FCover20  263
## 5   NeuronByothersCuffdiff2FCover20  706
## 6 NewOligoByothersCuffdiff2FCover20   82
## 7 PreOligoByothersCuffdiff2FCover20  116
write.csv(cellTypeGeneCount,"CellType_GSE52564_FCover20_Signature_Hu_GeneCount.csv")

Step21: Enrichment test for comparison with Cuffdiff2 defined cell type signature genes from microglia, neuron, astrocyte, (precursor, new, mature)oligodendrocyte, endothelial cells

#This is the actual calulation of hypergeometric enrichment
enrichmentsCellType = userListEnrichment(rownames(datExprA1g2), modulesA1,"CellType_GSE52564_FCover20_Signature_Hu.csv", "", "enrichmentsCellType_Sigificantcolors.csv")
## 119 comparisons were successfully performed.

Saving results

#List of genes that overlap between modules and cell type
enrichmentsCellType_OvGenes=enrichmentsCellType[[2]]
dir.create("./enrichmentsCellType_OvGenes")
for (i in 1:length(enrichmentsCellType_OvGenes)) {
  write.csv(enrichmentsCellType_OvGenes[i], file=paste0("enrichmentsCellType_OvGenes/", names(enrichmentsCellType_OvGenes)[i], ".txt"))
}

#List of genes that overlap between modules and cell type
enrichmentsCellType_NumOvGenes_pvalue=enrichmentsCellType[[1]]
write.csv(enrichmentsCellType_NumOvGenes_pvalue,"enrichments_NumOvGenes_pvalue_colors.csv")
geneListsModuleSummary=read.csv('geneListsModuleSummarycolors.csv', sep=',')
enrichmentsSummary=read.csv('enrichments_NumOvGenes_pvalue_colors.csv', sep=',')
geneListsModuleSummaryMod=table(geneListsModuleSummary$modulesA1)
geneListsModuleSummaryMod=as.data.frame(geneListsModuleSummaryMod)
write.csv(geneListsModuleSummaryMod, "geneListsModuleSummarycolors_GeneCount.csv")

colnames(geneListsModuleSummaryMod)=c("InputCategories", "Freq")
geneListsModuleSummaryMod
##    InputCategories Freq
## 1            black  101
## 2             blue  237
## 3            brown  149
## 4             cyan   39
## 5            green  111
## 6      greenyellow   64
## 7             grey 5665
## 8           grey60   24
## 9        lightcyan   32
## 10         magenta   84
## 11    midnightblue   35
## 12            pink   84
## 13          purple   77
## 14             red  101
## 15          salmon   41
## 16             tan   51
## 17       turquoise  259
## 18          yellow  147
enrichmentsSummarymergeMod=merge(enrichmentsSummary,geneListsModuleSummaryMod, by="InputCategories")
write.csv(enrichmentsSummarymergeMod,"enrichments_SummarymergeMod_colors.csv")

Same as above for labeled modules

enrichmentsCellTypeL = userListEnrichment(rownames(datExprA1g2), modulesA1L,"CellType_GSE52564_FCover20_Signature_Hu.csv", "", "enrichmentsCellType_Sigificantlabels.csv")
## 126 comparisons were successfully performed.
enrichmentsCellType_OvGenesL=enrichmentsCellTypeL[[2]]
for (i in 1:length(enrichmentsCellType_OvGenesL)) {
  write.csv(enrichmentsCellType_OvGenesL[i], file=paste0("enrichmentsCellType_OvGenes/", names(enrichmentsCellType_OvGenesL)[i], ".txt"))
}

enrichmentsCellType_NumOvGenes_pvalueL=enrichmentsCellTypeL[[1]]
write.csv(enrichmentsCellType_NumOvGenes_pvalueL,"enrichments_NumOvGenes_pvalue_labels.csv")
geneListsModuleSummaryL=read.csv('geneListsModuleSummarylabels.csv', sep=',')
enrichmentsSummaryL=read.csv('enrichments_NumOvGenes_pvalue_labels.csv', sep=',')
geneListsModuleSummaryModL=table(geneListsModuleSummaryL$modulesA1L)
geneListsModuleSummaryModL=as.data.frame(geneListsModuleSummaryModL)
write.csv(geneListsModuleSummaryModL, "geneListsModuleSummarylabels_GeneCount.csv")

colnames(geneListsModuleSummaryModL)=c("InputCategories", "Freq")
enrichmentsSummarymergeModL=merge(enrichmentsSummaryL,geneListsModuleSummaryModL, by="InputCategories")
write.csv(enrichmentsSummarymergeModL,"enrichments_SummarymergeMod_labels.csv")

Step 22: Cross-pipeline preservation and hypergeometric test: Qualitatively and quantitatively measure cross-species, cross-pipeline and/or any cross-dataset network preservation at the module level

Will export the files for preservation analysis and hypergeometric test

#Export expression file
write.csv(datExprA1g2,"ForPres&Hyper_MsAge7K_datExprA1g2.csv")
#Export the module and gene names for use Preservation and hypergeometric test 
write.csv(kMEtable1[,c("PSID","Gene","Module")],"ForPres&Hyper_MsAge7K_GeneModule.csv")
write.csv(kMEtable1L[,c("PSID","Gene","Module")],"ForPres&Hyper_MsAge7K_GeneModuleL.csv")

below select yellow module which has microglia genes

Step23: Annotation of module enriched with microglia signature genes

Select enrichR databases

dbs <- listEnrichrDbs()
dbs
##                                           libraryName numTerms
## 1                                 Genome_Browser_PWMs      615
## 2                            TRANSFAC_and_JASPAR_PWMs      326
## 3                           Transcription_Factor_PPIs      290
## 4                                           ChEA_2013      353
## 5                    Drug_Perturbations_from_GEO_2014      701
## 6                             ENCODE_TF_ChIP-seq_2014      498
## 7                                       BioCarta_2013      249
## 8                                       Reactome_2013       78
## 9                                   WikiPathways_2013      199
## 10                Disease_Signatures_from_GEO_up_2014      142
## 11                                          KEGG_2013      200
## 12                         TF-LOF_Expression_from_GEO      269
## 13                                TargetScan_microRNA      222
## 14                                   PPI_Hub_Proteins      385
## 15                         GO_Molecular_Function_2015     1136
## 16                                          GeneSigDB     2139
## 17                                Chromosome_Location      386
## 18                                   Human_Gene_Atlas       84
## 19                                   Mouse_Gene_Atlas       96
## 20                         GO_Cellular_Component_2015      641
## 21                         GO_Biological_Process_2015     5192
## 22                           Human_Phenotype_Ontology     1779
## 23                    Epigenomics_Roadmap_HM_ChIP-seq      383
## 24                                           KEA_2013      474
## 25                  NURSA_Human_Endogenous_Complexome     1796
## 26                                              CORUM     1658
## 27                            SILAC_Phosphoproteomics       84
## 28                    MGI_Mammalian_Phenotype_Level_3       71
## 29                    MGI_Mammalian_Phenotype_Level_4      476
## 30                                        Old_CMAP_up     6100
## 31                                      Old_CMAP_down     6100
## 32                                       OMIM_Disease       90
## 33                                      OMIM_Expanded      187
## 34                                          VirusMINT       85
## 35                               MSigDB_Computational      858
## 36                        MSigDB_Oncogenic_Signatures      189
## 37              Disease_Signatures_from_GEO_down_2014      142
## 38                    Virus_Perturbations_from_GEO_up      323
## 39                  Virus_Perturbations_from_GEO_down      323
## 40                      Cancer_Cell_Line_Encyclopedia      967
## 41                           NCI-60_Cancer_Cell_Lines       93
## 42        Tissue_Protein_Expression_from_ProteomicsDB      207
## 43  Tissue_Protein_Expression_from_Human_Proteome_Map       30
## 44                                   HMDB_Metabolites     3906
## 45                              Pfam_InterPro_Domains      311
## 46                         GO_Biological_Process_2013      941
## 47                         GO_Cellular_Component_2013      205
## 48                         GO_Molecular_Function_2013      402
## 49                               Allen_Brain_Atlas_up     2192
## 50                            ENCODE_TF_ChIP-seq_2015      816
## 51                  ENCODE_Histone_Modifications_2015      412
## 52                  Phosphatase_Substrates_from_DEPOD       59
## 53                             Allen_Brain_Atlas_down     2192
## 54                  ENCODE_Histone_Modifications_2013      109
## 55                          Achilles_fitness_increase      216
## 56                          Achilles_fitness_decrease      216
## 57                       MGI_Mammalian_Phenotype_2013      476
## 58                                      BioCarta_2015      239
## 59                                      HumanCyc_2015      125
## 60                                          KEGG_2015      179
## 61                                    NCI-Nature_2015      209
## 62                                       Panther_2015      104
## 63                                  WikiPathways_2015      404
## 64                                      Reactome_2015     1389
## 65                                             ESCAPE      315
## 66                                         HomoloGene       12
## 67                Disease_Perturbations_from_GEO_down      839
## 68                  Disease_Perturbations_from_GEO_up      839
## 69                   Drug_Perturbations_from_GEO_down      906
## 70                   Genes_Associated_with_NIH_Grants    32876
## 71                     Drug_Perturbations_from_GEO_up      906
## 72                                           KEA_2015      428
## 73              Single_Gene_Perturbations_from_GEO_up     2460
## 74            Single_Gene_Perturbations_from_GEO_down     2460
## 75                                          ChEA_2015      395
## 76                                              dbGaP      345
## 77                           LINCS_L1000_Chem_Pert_up    33132
## 78                         LINCS_L1000_Chem_Pert_down    33132
## 79   GTEx_Tissue_Sample_Gene_Expression_Profiles_down     2918
## 80     GTEx_Tissue_Sample_Gene_Expression_Profiles_up     2918
## 81                 Ligand_Perturbations_from_GEO_down      261
## 82                  Aging_Perturbations_from_GEO_down      286
## 83                    Aging_Perturbations_from_GEO_up      286
## 84                   Ligand_Perturbations_from_GEO_up      261
## 85                   MCF7_Perturbations_from_GEO_down      401
## 86                     MCF7_Perturbations_from_GEO_up      401
## 87                Microbe_Perturbations_from_GEO_down      312
## 88                  Microbe_Perturbations_from_GEO_up      312
## 89              LINCS_L1000_Ligand_Perturbations_down       96
## 90                LINCS_L1000_Ligand_Perturbations_up       96
## 91              LINCS_L1000_Kinase_Perturbations_down     3644
## 92                LINCS_L1000_Kinase_Perturbations_up     3644
## 93                                      Reactome_2016     1530
## 94                                          KEGG_2016      293
## 95                                  WikiPathways_2016      437
## 96          ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X      104
## 97                 Kinase_Perturbations_from_GEO_down      285
## 98                   Kinase_Perturbations_from_GEO_up      285
## 99                                      BioCarta_2016      237
## 100                                     HumanCyc_2016      152
## 101                                   NCI-Nature_2016      209
## 102                                      Panther_2016      112
## 103                                        DrugMatrix     7876
## 104                                         ChEA_2016      645
## 105                                             huMAP      995
## 106                                    Jensen_TISSUES     1842
## 107 RNA-Seq_Disease_Gene_and_Drug_Signatures_from_GEO     1302
## 108                      MGI_Mammalian_Phenotype_2017     5231
## 109                               Jensen_COMPARTMENTS     2283
## 110                                   Jensen_DISEASES     1811
## 111                                      BioPlex_2017     3915
## 112                        GO_Cellular_Component_2017      636
## 113                        GO_Molecular_Function_2017      972
## 114                        GO_Biological_Process_2017     3166
## 115                       GO_Cellular_Component_2017b      816
## 116                       GO_Molecular_Function_2017b     3271
## 117                       GO_Biological_Process_2017b    10125
## 118                                    ARCHS4_Tissues      108
## 119                                 ARCHS4_Cell-lines      125
## 120                                  ARCHS4_IDG_Coexp      352
## 121                              ARCHS4_Kinases_Coexp      498
## 122                                  ARCHS4_TFs_Coexp     1724
## 123                           SysMyo_Muscle_Gene_Sets     1135
## 124                                   miRTarBase_2017     3240
## 125                          TargetScan_microRNA_2017      683
## 126              Enrichr_Libraries_Most_Popular_Genes      121
## 127           Enrichr_Submissions_TF-Gene_Coocurrence     1722
## 128        Data_Acquisition_Method_Most_Popular_Genes       12
## 129                                            DSigDB     4026
## 130                        GO_Biological_Process_2018     5103
## 131                        GO_Cellular_Component_2018      446
## 132                        GO_Molecular_Function_2018     1151
## 133           TF_Perturbations_Followed_by_Expression     1958
##     geneCoverage genesPerTerm
## 1          13362          275
## 2          27884         1284
## 3           6002           77
## 4          47172         1370
## 5          47107          509
## 6          21493         3713
## 7           1295           18
## 8           3185           73
## 9           2854           34
## 10         15057          300
## 11          4128           48
## 12         34061          641
## 13          7504          155
## 14         16399          247
## 15         12753           57
## 16         23726          127
## 17         32740           85
## 18         13373          258
## 19         19270          388
## 20         13236           82
## 21         14264           58
## 22          3096           31
## 23         22288         4368
## 24          4533           37
## 25         10231          158
## 26          2741            5
## 27          5655          342
## 28         10406          715
## 29         10493          200
## 30         11251          100
## 31          8695          100
## 32          1759           25
## 33          2178           89
## 34           851           15
## 35         10061          106
## 36         11250          166
## 37         15406          300
## 38         17711          300
## 39         17576          300
## 40         15797          176
## 41         12232          343
## 42         13572          301
## 43          6454          301
## 44          3723           47
## 45          7588           35
## 46          7682           78
## 47          7324          172
## 48          8469          122
## 49         13121          305
## 50         26382         1811
## 51         29065         2123
## 52           280            9
## 53         13877          304
## 54         15852          912
## 55          4320          129
## 56          4271          128
## 57         10496          201
## 58          1678           21
## 59           756           12
## 60          3800           48
## 61          2541           39
## 62          1918           39
## 63          5863           51
## 64          6768           47
## 65         25651          807
## 66         19129         1594
## 67         23939          293
## 68         23561          307
## 69         23877          302
## 70         15886            9
## 71         24350          299
## 72          3102           25
## 73         31132          298
## 74         30832          302
## 75         48230         1429
## 76          5613           36
## 77          9559           73
## 78          9448           63
## 79         16725         1443
## 80         19249         1443
## 81         15090          282
## 82         16129          292
## 83         15309          308
## 84         15103          318
## 85         15022          290
## 86         15676          310
## 87         15854          279
## 88         15015          321
## 89          3788          159
## 90          3357          153
## 91         12668          300
## 92         12638          300
## 93          8973           64
## 94          7010           87
## 95          5966           51
## 96         15562          887
## 97         17850          300
## 98         17660          300
## 99          1348           19
## 100          934           13
## 101         2541           39
## 102         2041           42
## 103         5209          300
## 104        49238         1550
## 105         2243           19
## 106        19586          545
## 107        22440          505
## 108         8184           24
## 109        18329          161
## 110        15755           28
## 111        10271           22
## 112        10427           38
## 113        10601           25
## 114        13822           21
## 115         8002          143
## 116        10089           45
## 117        13247           49
## 118        21809         2316
## 119        23601         2395
## 120        20883          299
## 121        19612          299
## 122        25983          299
## 123        19500          137
## 124        14893          128
## 125        17598         1208
## 126         5902          109
## 127        12486          299
## 128         1073          100
## 129        19513          117
## 130        14433           36
## 131         8655           61
## 132        11459           39
## 133        19741          270
##                                                                          link
## 1                    http://hgdownload.cse.ucsc.edu/goldenPath/hg18/database/
## 2                                    http://jaspar.genereg.net/html/DOWNLOAD/
## 3                                                                            
## 4                              http://amp.pharm.mssm.edu/lib/cheadownload.jsp
## 5                                            http://www.ncbi.nlm.nih.gov/geo/
## 6                                http://genome.ucsc.edu/ENCODE/downloads.html
## 7                         https://cgap.nci.nih.gov/Pathways/BioCarta_Pathways
## 8                                 http://www.reactome.org/download/index.html
## 9                     http://www.wikipathways.org/index.php/Download_Pathways
## 10                                           http://www.ncbi.nlm.nih.gov/geo/
## 11                                          http://www.kegg.jp/kegg/download/
## 12                                           http://www.ncbi.nlm.nih.gov/geo/
## 13  http://www.targetscan.org/cgi-bin/targetscan/data_download.cgi?db=vert_61
## 14                                              http://amp.pharm.mssm.edu/X2K
## 15                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 16                             http://genesigdb.org/genesigdb/downloadall.jsp
## 17                   http://software.broadinstitute.org/gsea/msigdb/index.jsp
## 18                                               http://biogps.org/downloads/
## 19                                               http://biogps.org/downloads/
## 20                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 21                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 22                                   http://www.human-phenotype-ontology.org/
## 23                                         http://www.roadmapepigenomics.org/
## 24                           http://amp.pharm.mssm.edu/lib/keacommandline.jsp
## 25                                      https://www.nursa.org/nursa/index.jsf
## 26                        http://mips.helmholtz-muenchen.de/genre/proj/corum/
## 27                           http://amp.pharm.mssm.edu/lib/keacommandline.jsp
## 28                                            http://www.informatics.jax.org/
## 29                                            http://www.informatics.jax.org/
## 30                                        http://www.broadinstitute.org/cmap/
## 31                                        http://www.broadinstitute.org/cmap/
## 32                                              http://www.omim.org/downloads
## 33                                              http://www.omim.org/downloads
## 34                                  http://mint.bio.uniroma2.it/download.html
## 35                  http://www.broadinstitute.org/gsea/msigdb/collections.jsp
## 36                  http://www.broadinstitute.org/gsea/msigdb/collections.jsp
## 37                                           http://www.ncbi.nlm.nih.gov/geo/
## 38                                           http://www.ncbi.nlm.nih.gov/geo/
## 39                                           http://www.ncbi.nlm.nih.gov/geo/
## 40                             https://portals.broadinstitute.org/ccle/home\n
## 41                                               http://biogps.org/downloads/
## 42                                              https://www.proteomicsdb.org/
## 43                                  http://www.humanproteomemap.org/index.php
## 44                                               http://www.hmdb.ca/downloads
## 45                                ftp://ftp.ebi.ac.uk/pub/databases/interpro/
## 46                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 47                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 48                 http://www.geneontology.org/GO.downloads.annotations.shtml
## 49                                                  http://www.brain-map.org/
## 50                               http://genome.ucsc.edu/ENCODE/downloads.html
## 51                               http://genome.ucsc.edu/ENCODE/downloads.html
## 52                                            http://www.koehn.embl.de/depod/
## 53                                                  http://www.brain-map.org/
## 54                               http://genome.ucsc.edu/ENCODE/downloads.html
## 55                                     http://www.broadinstitute.org/achilles
## 56                                     http://www.broadinstitute.org/achilles
## 57                                            http://www.informatics.jax.org/
## 58                        https://cgap.nci.nih.gov/Pathways/BioCarta_Pathways
## 59                                                       http://humancyc.org/
## 60                                          http://www.kegg.jp/kegg/download/
## 61                                                    http://pid.nci.nih.gov/
## 62                                                  http://www.pantherdb.org/
## 63                    http://www.wikipathways.org/index.php/Download_Pathways
## 64                                http://www.reactome.org/download/index.html
## 65                                           http://www.maayanlab.net/ESCAPE/
## 66                                     http://www.ncbi.nlm.nih.gov/homologene
## 67                                           http://www.ncbi.nlm.nih.gov/geo/
## 68                                           http://www.ncbi.nlm.nih.gov/geo/
## 69                                           http://www.ncbi.nlm.nih.gov/geo/
## 70                                    https://grants.nih.gov/grants/oer.htm\n
## 71                                           http://www.ncbi.nlm.nih.gov/geo/
## 72                                          http://amp.pharm.mssm.edu/Enrichr
## 73                                           http://www.ncbi.nlm.nih.gov/geo/
## 74                                           http://www.ncbi.nlm.nih.gov/geo/
## 75                                          http://amp.pharm.mssm.edu/Enrichr
## 76                                            http://www.ncbi.nlm.nih.gov/gap
## 77                                                           https://clue.io/
## 78                                                           https://clue.io/
## 79                                                 http://www.gtexportal.org/
## 80                                                 http://www.gtexportal.org/
## 81                                           http://www.ncbi.nlm.nih.gov/geo/
## 82                                           http://www.ncbi.nlm.nih.gov/geo/
## 83                                           http://www.ncbi.nlm.nih.gov/geo/
## 84                                           http://www.ncbi.nlm.nih.gov/geo/
## 85                                           http://www.ncbi.nlm.nih.gov/geo/
## 86                                           http://www.ncbi.nlm.nih.gov/geo/
## 87                                           http://www.ncbi.nlm.nih.gov/geo/
## 88                                           http://www.ncbi.nlm.nih.gov/geo/
## 89                                                           https://clue.io/
## 90                                                           https://clue.io/
## 91                                                           https://clue.io/
## 92                                                           https://clue.io/
## 93                                http://www.reactome.org/download/index.html
## 94                                          http://www.kegg.jp/kegg/download/
## 95                    http://www.wikipathways.org/index.php/Download_Pathways
## 96                                                                           
## 97                                           http://www.ncbi.nlm.nih.gov/geo/
## 98                                           http://www.ncbi.nlm.nih.gov/geo/
## 99                         http://cgap.nci.nih.gov/Pathways/BioCarta_Pathways
## 100                                                      http://humancyc.org/
## 101                                                   http://pid.nci.nih.gov/
## 102                                         http://www.pantherdb.org/pathway/
## 103                                     https://ntp.niehs.nih.gov/drugmatrix/
## 104                                         http://amp.pharm.mssm.edu/Enrichr
## 105                                              http://proteincomplexes.org/
## 106                                             http://tissues.jensenlab.org/
## 107                                          http://www.ncbi.nlm.nih.gov/geo/
## 108                                           http://www.informatics.jax.org/
## 109                                        http://compartments.jensenlab.org/
## 110                                            http://diseases.jensenlab.org/
## 111                                           http://bioplex.hms.harvard.edu/
## 112                                              http://www.geneontology.org/
## 113                                              http://www.geneontology.org/
## 114                                              http://www.geneontology.org/
## 115                                              http://www.geneontology.org/
## 116                                              http://www.geneontology.org/
## 117                                              http://www.geneontology.org/
## 118                                          http://amp.pharm.mssm.edu/archs4
## 119                                          http://amp.pharm.mssm.edu/archs4
## 120                                          http://amp.pharm.mssm.edu/archs4
## 121                                          http://amp.pharm.mssm.edu/archs4
## 122                                          http://amp.pharm.mssm.edu/archs4
## 123                                               http://sys-myo.rhcloud.com/
## 124                                        http://mirtarbase.mbc.nctu.edu.tw/
## 125                                                http://www.targetscan.org/
## 126                                         http://amp.pharm.mssm.edu/Enrichr
## 127                                         http://amp.pharm.mssm.edu/Enrichr
## 128                                         http://amp.pharm.mssm.edu/Enrichr
## 129                             http://tanlab.ucdenver.edu/DSigDB/DSigDBv1.0/
## 130                                              http://www.geneontology.org/
## 131                                              http://www.geneontology.org/
## 132                                              http://www.geneontology.org/
## 133                                          http://www.ncbi.nlm.nih.gov/geo/
#select pathway databases of choice from above
dbs_select<-c("GO_Biological_Process_2018", "KEGG_2016", "Reactome_2016")

Select color module of interest

SelectGeneListA1=read.csv('geneListsModuleSummarycolors.csv',header=T, sep=',')
SelectGeneListA1=SelectGeneListA1$Gene[modulesA1=='yellow']
#compare for enrichment with our Genes
enriched <- enrichr(SelectGeneListA1, dbs_select)
## Uploading data to Enrichr... Done.
##   Querying GO_Biological_Process_2018... Done.
##   Querying KEGG_2016... Done.
##   Querying Reactome_2016... Done.
## Parsing results... Done.
#GO Biological Process results
SelectGeneListA1_GOBiologicalProcess<-as.data.frame(enriched[["GO_Biological_Process_2018"]])
write.csv(SelectGeneListA1_GOBiologicalProcess,file="SelectGeneListA1_GOBiologicalProcess_yellow.csv")
GOBiologicalProcessData<-SelectGeneListA1_GOBiologicalProcess[order(-SelectGeneListA1_GOBiologicalProcess$Combined.Score),]
GOBiologicalProcessData<-GOBiologicalProcessData[1:10,]
ggplot(data=GOBiologicalProcessData, aes(x=Term, y=-log10(Adjusted.P.value), fill=Combined.Score))+
  geom_bar(position="stack", stat="identity")+
  labs(title="GOBiologicalProcess Comparison")+
  #theme(axis.text.x=element_text(angle=90,hjust=1)) +
  coord_flip()+
  labs(y="-log10(Adjusted.P.value)")+
  scale_fill_gradient(low="grey",high="yellow")

dev.print(pdf,"SelectGeneListA1_GOBiologicalProcessTop10plot_yellow.pdf", width=15, height=5)
## quartz_off_screen 
##                 2
#KEEG pathway results
SelectGeneListA1_KEEGpathway<-as.data.frame(enriched[["KEGG_2016"]])
write.csv(SelectGeneListA1_KEEGpathway,file="SelectGeneListA1_KEEGpathway_yellow.csv")
KEEGpathwayData<-SelectGeneListA1_KEEGpathway[order(-SelectGeneListA1_KEEGpathway$Combined.Score),]
KEEGpathwayData<-KEEGpathwayData[1:10,]
ggplot(data=KEEGpathwayData, aes(x=Term, y=-log10(Adjusted.P.value), fill=Combined.Score))+
  geom_bar(position="stack", stat="identity")+
  labs(title="KEEGpathway Comparison")+
  #theme(axis.text.x=element_text(angle=90,hjust=1)) +
  coord_flip()+
  labs(y="-log10(Adjusted.P.value)")+
  scale_fill_gradient(low="grey",high="yellow")

dev.print(pdf,"SelectGeneListA1_KEEGpathwayTop10plot_yellow.pdf", width=10, height=5)
## quartz_off_screen 
##                 2
#Reactome pathway results
SelectGeneListA1_Reactome<-as.data.frame(enriched[["Reactome_2016"]])
write.csv(SelectGeneListA1_Reactome,"SelectGeneListA1_Reactome_yellow.csv")
ReactomeData<-SelectGeneListA1_Reactome[order(-SelectGeneListA1_Reactome$Combined.Score),]
ReactomeData<-ReactomeData[1:10,]
ggplot(data=ReactomeData, aes(x=Term, y=-log10(Adjusted.P.value), fill=Combined.Score))+
  geom_bar(position="identity", stat="identity")+
  labs(title="Reactome Comparison")+
  #theme(axis.text.x=element_text(angle=90,hjust=1)) +
  coord_flip()+
  labs(y="-log10(Adjusted.P.value)")+
  scale_fill_gradient(low="grey",high="yellow")

dev.print(pdf,"SelectGeneListA1_ReactomeTop10plot_yellow.pdf", width=10, height=5)
## quartz_off_screen 
##                 2

hub genes from yellow module which is aging and disease associated ahd has microglia genes

Step24: Aging association and plot of ‘microglia’ genes enriched in ‘yellow’ module in aging data A1

#Microglia genes in yellow module are stored in a variable for plotting. This can be replace with any choice of genes present in a given module.  
yellow_Microglia_hubgenes=c("C4B","C4A","CTSS","LGALS3BP","APOD","C1QC","TREM2","C1QB","C1QA","MPEG1")
summary(yellow_Microglia_hubgenes)
##    Length     Class      Mode 
##        10 character character
#view metadata and expression data A1
DT::datatable(metadataA1g_coded)
DT::datatable(datExprA1g2)
#storing variable
Age=metadataA1g_coded$Age
head(Age)
## [1]  3  3  3  3  3 24
#corelation test with variable on expression and modules
var = Age
datAge = t(apply(datExprA1g2,1,cor.test.l))

var = Age
datAgeM = t(apply(t(ME_1A),1,cor.test.l))

#Find the Age-related expression in data A1
colnames(datAge)=c("CorrAge","PvalAge")
dim(datAge[datAge[,2]<0.01,])
## [1] 318   2
DT::datatable(datAge[datAge[,2]<0.01,])
write.csv(datAge[datAge[,2]<0.01,],"A1_datAge_sig_trait_table_form.csv")

#Find the Age-related modules in data A1
#this is visually represented in the trait plot 'A1_relating modules to trait.pdf'
colnames(datAgeM)=c("CorrAge","PvalAge")
datAgeM[datAgeM[,2]<0.01,]
##             CorrAge      PvalAge
## MEgrey    0.3899990 7.736679e-04
## MEgrey60  0.3073683 9.123049e-03
## MEmagenta 0.3898617 7.772807e-04
## MEyellow  0.6279057 4.600830e-09
write.csv(datAgeM[datAgeM[,2]<0.01,],"A1_datAgeM_sig_trait_table_form.csv")
#Export the correlation with Age and p-value 
datAge_yellow_Microglia_hubgenes=datAge[yellow_Microglia_hubgenes,]
DT::datatable(datAge_yellow_Microglia_hubgenes)
write.csv(datAge_yellow_Microglia_hubgenes,"A1_datAge_yellow_Microglia_hubgenes.csv")
#Scatterplot of genes
#If get error in kniting the Rmd then uncomment line below
#par(mar=c(1,1,1,1))
pdf(file="A1_Age_yellow_Microglia_hubgenes_Scatterplot.pdf", width=10, height=10)
par(mfrow=c(2,2))
for (i in yellow_Microglia_hubgenes[1:length(yellow_Microglia_hubgenes)]) {
  verboseScatterplot(Age, as.numeric(datExprA1g2[i,]),main=i, las=2, abline=TRUE, xlab="Age", ylab="")
}
dev.off()
## quartz_off_screen 
##                 2
#Scatterplot of module
verboseScatterplot(Age, as.numeric(ME_1A[,"MEyellow"]), main="yellow ME expr. -", las=2, abline=TRUE, xlab="Age", ylab="")

dev.print(pdf,"A1_Age_yellow_Scatterplot.pdf", width=5, height=5)
## quartz_off_screen 
##                 2

Step25: Export network file for VisaNT and Cytoscape

Ref1: https://scholarscompass.vcu.edu/cgi/viewcontent.cgi?article=5500&context=etd

Ref2: https://www.stat.wisc.edu/~yandell/statgen/ucla/WGCNA/wgcna.html

#Export using already generated TOM and power
TOM = TOM

#Select module
sel_module= "yellow"

#We have many diffent modules this function only keeps those modules that match with our selected module name  
inModule = is.finite(colorsA1==sel_module)
#Select only those genes that are in the selected module. In keeping with microarrya nomenclature modProbes is the default name of the genes in modules    for the WGCNA package. 
probes = rownames(datExprA1g2)
modProbes = probes[inModule]
#modProbes = substring(modProbes,1,25)
length(modProbes)
## [1] 7301
#this number should match with the number of genes in the selected module name
  
#Select the corresponding Topological Overlap for the selected module i.e. only those genes are selected that belong to the selected module
modTOM = TOM[inModule, inModule];
dimnames(modTOM) = list(modProbes, modProbes)

#Export the network into files for VisANT for the selected module. 
vis = exportNetworkToVisANT(modTOM,file = paste("VisANTInput-", sel_module, "-yellowUnsortedAllModGenes.txt", sep=""),weighted = TRUE, threshold = 0)
vis[1:3,]
##   from    to direction method    weight
## 1  GH1  CSH1         0  M0039 0.4473446
## 2  GH1   GH2         0  M0039 0.4473446
## 3  GH1 CSHL1         0  M0039 0.4473446
#Connectivity or weights is a.k.a. kIM or kWithin 'kME hub genes' connection with other nodes or genes within module
vis_kMEhubs1=vis[which(vis$from==yellow_Microglia_hubgenes),] 

vis_kMEhubs2=vis_kMEhubs1
#optional select connections from hub genes, start at 0.01 if this gives too many connections to increase by 0.01 by uncomenting line below
#vis_kMEhubs2=vis_kMEhubs1[which(vis_kMEhubs1$weight>0.01),] #

vis_kMEhubs3=vis_kMEhubs2[order(-vis_kMEhubs2$weight),] #order by stronger to weaker connectivity

#export output
write.table(vis_kMEhubs3, paste0("VisANTInput-", sel_module, "--yellowHubGenesOrdered.txt", sep=""),row.names=F, col.names=F, quote=F)

Cytoscape Export

#Export using already generated TOM and power
TOM = TOM

#Select module
sel_module= "yellow"
  
#We have many diffent moduledynalicLabels this function only keeps those moduledynamicLabels that match with our selected module name  
inModule = is.finite(colorsA1==sel_module)
#Select only those genes that are in the selected module
genes=rownames(datExprA1g2)
modGenes=genes[inModule]
length(modGenes)
## [1] 7301
#this number should match with the number of genes in the selected module name
  
#Select the corresponding Topological Overlap for the selected module i.e. only those genes are selected that belong to the selected module
modTOM = TOM[inModule, inModule]
dimnames(modTOM) = list(modGenes, modGenes)
  
  
#Export the network into edge and node list files for Cytoscape for the selected module and use as input for cytoscape
#threshold start at 0.02 if this gives too many connections increase by 0.01 
cyt = exportNetworkToCytoscape(modTOM,
                               edgeFile=paste("CytoEdge",paste(sel_module,collapse="-"),"yellow.txt",sep=""),
                               nodeFile=paste("CytoNode",paste(sel_module,collapse="-"),"yellow.txt",sep=""),
                               weighted = TRUE, threshold = 0.02,nodeNames=modGenes,
                               altNodeNames = modGenes, nodeAttr = colorsA1[sel_module])

Step26: Summary of kME kWithin and number of connections for yellow module hub genes

vis_kMEhubs4=as.data.frame(table(vis_kMEhubs3$from))
colnames(vis_kMEhubs4)=c("Gene","NumberOfConnections")
rownames(vis_kMEhubs4)=vis_kMEhubs4$Gene
#Create a summary file of kME, kWithin and intramodules connections 
merge1=merge(vis_kMEhubs4,IntraModularConnectivity1L, by="row.names")
merge2=merge(merge1, kMEtable1, by="Gene")
#drop extra row.names column
merge3=merge2[,-c(2)]
write.csv(merge3, "kWithinConnectionskME_yellowHubs.csv")
sessionInfo()
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] enrichR_1.0           WGCNA_1.66            fastcluster_1.1.25   
##  [4] flashClust_1.01-2     qvalue_2.12.0         dynamicTreeCut_1.63-1
##  [7] impute_1.54.0         biomaRt_2.36.1        stringi_1.2.4        
## [10] Rcpp_1.0.0            corrplot_0.84         viridis_0.5.1        
## [13] viridisLite_0.3.0     Hmisc_4.1-1           Formula_1.2-3        
## [16] lattice_0.20-38       RColorBrewer_1.1-2    gplots_3.0.1         
## [19] ggmap_2.6.1           pamr_1.55             survival_2.43-1      
## [22] cluster_2.0.7-1       sva_3.28.0            BiocParallel_1.14.2  
## [25] genefilter_1.62.0     mgcv_1.8-25           nlme_3.1-137         
## [28] DT_0.5                edgeR_3.22.5          limma_3.36.5         
## [31] magrittr_1.5          ggplot2_3.1.0         dplyr_0.7.7          
## [34] reshape2_1.4.3        stringr_1.3.1        
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.1.2       plyr_1.8.4            lazyeval_0.2.1       
##   [4] sp_1.3-1              splines_3.5.0         crosstalk_1.0.0      
##   [7] robust_0.4-18         digest_0.6.18         foreach_1.4.4        
##  [10] htmltools_0.3.6       GO.db_3.6.0           gdata_2.18.0         
##  [13] checkmate_1.8.5       memoise_1.1.0         fit.models_0.5-14    
##  [16] doParallel_1.0.14     annotate_1.58.0       matrixStats_0.54.0   
##  [19] prettyunits_1.0.2     jpeg_0.1-8            colorspace_1.3-2     
##  [22] blob_1.1.1            rrcov_1.4-4           crayon_1.3.4         
##  [25] RCurl_1.95-4.11       jsonlite_1.5          bindr_0.1.1          
##  [28] iterators_1.0.10      glue_1.3.0            gtable_0.2.0         
##  [31] BiocGenerics_0.26.0   DEoptimR_1.0-8        maps_3.3.0           
##  [34] scales_1.0.0          mvtnorm_1.0-8         DBI_1.0.0            
##  [37] xtable_1.8-3          progress_1.2.0        htmlTable_1.12       
##  [40] foreign_0.8-71        bit_1.1-14            mapproj_1.2.6        
##  [43] preprocessCore_1.42.0 stats4_3.5.0          htmlwidgets_1.3      
##  [46] httr_1.3.1            geosphere_1.5-7       acepack_1.4.1        
##  [49] pkgconfig_2.0.2       XML_3.98-1.16         nnet_7.3-12          
##  [52] locfit_1.5-9.1        tidyselect_0.2.5      labeling_0.3         
##  [55] rlang_0.3.0.1         later_0.7.5           AnnotationDbi_1.42.1 
##  [58] munsell_0.5.0         tools_3.5.0           RSQLite_2.1.1        
##  [61] evaluate_0.12         yaml_2.2.0            knitr_1.20           
##  [64] bit64_0.9-7           robustbase_0.93-3     caTools_1.17.1.1     
##  [67] purrr_0.2.5           RgoogleMaps_1.4.3     bindrcpp_0.2.2       
##  [70] mime_0.6              compiler_3.5.0        rstudioapi_0.8       
##  [73] curl_3.2              png_0.1-7             tibble_1.4.2         
##  [76] pcaPP_1.9-73          Matrix_1.2-15         pillar_1.3.0         
##  [79] data.table_1.11.8     bitops_1.0-6          httpuv_1.4.5         
##  [82] R6_2.3.0              latticeExtra_0.6-28   promises_1.0.1       
##  [85] KernSmooth_2.23-15    gridExtra_2.3         IRanges_2.14.12      
##  [88] codetools_0.2-15      MASS_7.3-51.1         gtools_3.8.1         
##  [91] assertthat_0.2.0      proto_1.0.0           rprojroot_1.3-2      
##  [94] rjson_0.2.20          withr_2.1.2           S4Vectors_0.18.3     
##  [97] parallel_3.5.0        hms_0.4.2             grid_3.5.0           
## [100] rpart_4.1-13          rmarkdown_1.10        Biobase_2.40.0       
## [103] shiny_1.2.0           base64enc_0.1-3
toLatex(sessionInfo())
## \begin{itemize}\raggedright
##   \item R version 3.5.0 (2018-04-23), \verb|x86_64-apple-darwin15.6.0|
##   \item Locale: \verb|en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8|
##   \item Running under: \verb|macOS High Sierra 10.13.6|
##   \item Matrix products: default
##   \item BLAS: \verb|/Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib|
##   \item LAPACK: \verb|/Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib|
##   \item Base packages: base, datasets, graphics, grDevices,
##     methods, stats, utils
##   \item Other packages: BiocParallel~1.14.2, biomaRt~2.36.1,
##     cluster~2.0.7-1, corrplot~0.84, dplyr~0.7.7, DT~0.5,
##     dynamicTreeCut~1.63-1, edgeR~3.22.5, enrichR~1.0,
##     fastcluster~1.1.25, flashClust~1.01-2, Formula~1.2-3,
##     genefilter~1.62.0, ggmap~2.6.1, ggplot2~3.1.0, gplots~3.0.1,
##     Hmisc~4.1-1, impute~1.54.0, lattice~0.20-38, limma~3.36.5,
##     magrittr~1.5, mgcv~1.8-25, nlme~3.1-137, pamr~1.55,
##     qvalue~2.12.0, RColorBrewer~1.1-2, Rcpp~1.0.0, reshape2~1.4.3,
##     stringi~1.2.4, stringr~1.3.1, survival~2.43-1, sva~3.28.0,
##     viridis~0.5.1, viridisLite~0.3.0, WGCNA~1.66
##   \item Loaded via a namespace (and not attached): acepack~1.4.1,
##     annotate~1.58.0, AnnotationDbi~1.42.1, assertthat~0.2.0,
##     backports~1.1.2, base64enc~0.1-3, bindr~0.1.1, bindrcpp~0.2.2,
##     Biobase~2.40.0, BiocGenerics~0.26.0, bit~1.1-14, bit64~0.9-7,
##     bitops~1.0-6, blob~1.1.1, caTools~1.17.1.1, checkmate~1.8.5,
##     codetools~0.2-15, colorspace~1.3-2, compiler~3.5.0,
##     crayon~1.3.4, crosstalk~1.0.0, curl~3.2, data.table~1.11.8,
##     DBI~1.0.0, DEoptimR~1.0-8, digest~0.6.18, doParallel~1.0.14,
##     evaluate~0.12, fit.models~0.5-14, foreach~1.4.4,
##     foreign~0.8-71, gdata~2.18.0, geosphere~1.5-7, glue~1.3.0,
##     GO.db~3.6.0, grid~3.5.0, gridExtra~2.3, gtable~0.2.0,
##     gtools~3.8.1, hms~0.4.2, htmlTable~1.12, htmltools~0.3.6,
##     htmlwidgets~1.3, httpuv~1.4.5, httr~1.3.1, IRanges~2.14.12,
##     iterators~1.0.10, jpeg~0.1-8, jsonlite~1.5,
##     KernSmooth~2.23-15, knitr~1.20, labeling~0.3, later~0.7.5,
##     latticeExtra~0.6-28, lazyeval~0.2.1, locfit~1.5-9.1,
##     mapproj~1.2.6, maps~3.3.0, MASS~7.3-51.1, Matrix~1.2-15,
##     matrixStats~0.54.0, memoise~1.1.0, mime~0.6, munsell~0.5.0,
##     mvtnorm~1.0-8, nnet~7.3-12, parallel~3.5.0, pcaPP~1.9-73,
##     pillar~1.3.0, pkgconfig~2.0.2, plyr~1.8.4, png~0.1-7,
##     preprocessCore~1.42.0, prettyunits~1.0.2, progress~1.2.0,
##     promises~1.0.1, proto~1.0.0, purrr~0.2.5, R6~2.3.0,
##     RCurl~1.95-4.11, RgoogleMaps~1.4.3, rjson~0.2.20,
##     rlang~0.3.0.1, rmarkdown~1.10, robust~0.4-18,
##     robustbase~0.93-3, rpart~4.1-13, rprojroot~1.3-2, rrcov~1.4-4,
##     RSQLite~2.1.1, rstudioapi~0.8, S4Vectors~0.18.3, scales~1.0.0,
##     shiny~1.2.0, sp~1.3-1, splines~3.5.0, stats4~3.5.0,
##     tibble~1.4.2, tidyselect~0.2.5, tools~3.5.0, withr~2.1.2,
##     XML~3.98-1.16, xtable~1.8-3, yaml~2.2.0
## \end{itemize}
#save image
save.image(file="WGCNA_temp.RData")
#Organize of files
library(filesstrings)

dir.create("Plots")
file.move(list.files(pattern = '*.pdf'), "Plots")

dir.create("Module_Trait&CellType&GO")
file.move(list.files(pattern = 'A1_dat*'), "Module_Trait&CellType&GO")
file.move(list.files(pattern = 'A1_relating*'), "Module_Trait&CellType&GO")
file.move(list.files(pattern = 'SelectGeneListA1*'), "Module_Trait&CellType&GO")
file.move(list.files(pattern = 'geneListsModuleSummary*'), "Module_Trait&CellType&GO")
file.move(list.files(pattern = 'A1_datAge*'), "Module_Trait&CellType&GO")

file.move(list.files(pattern = "enrichments_Num*"), "enrichmentsCellType_OvGenes")
file.move(list.files(pattern = "enrichments_Sum*"), "enrichmentsCellType_OvGenes")
file.move(list.files(pattern = "enrichmentsCellType_Sig*"), "enrichmentsCellType_OvGenes")

file.move(list.files(pattern = "A1_HubGenestop*"), "Module_GeneskMEHubs")
file.move(list.files(pattern = "ForPres&Hyper*"), "Module_GeneskMEHubs")
file.move(list.files(pattern = "kWithin*"), "Module_GeneskMEHubs")
file.move(list.files(pattern = "kMEtable1*"), "Module_GeneskMEHubs")
file.move(list.files(pattern = "kWithinConnectionskME*"), "Module_GeneskMEHubs")

dir.create("SVA_LM_csv")
file.move(list.files(pattern = 'edata*'), "SVA_LM_csv")
file.move(list.files(pattern = 'top*'), "SVA_LM_csv")
file.move(list.files(pattern = 'summary_stats*'), "SVA_LM_csv")

dir.create("VisANTCytoscape")
file.move(list.files(pattern = "VisANTInput*"), "VisANTCytoscape")
file.move(list.files(pattern = "CytoEdge*"), "VisANTCytoscape")
file.move(list.files(pattern = "CytoNode*"), "VisANTCytoscape")

#these input files are intermediate generated files and if not yet saved they are saved separately in a folder for storing original gene id conversions and annotation at the time of analysis
dir.create("Intermediate")
file.move(list.files(pattern = 'merge_GSE61915_GSE73503_GSE83931*'), "Intermediate")
file.move(list.files(pattern = 'GSE61915_GSE73503_GSE83931*'), "Intermediate")
file.move(list.files(pattern = 'MsAgeInterest*'), "Intermediate")
file.move(list.files(pattern = 'MsGenetoHuGene*'), "Intermediate")
file.move(list.files(pattern = 'SRR*'), "Intermediate")
file.move(list.files(pattern='CellType_GSE52564_FCover20_Signature_Hu_GeneCount.csv'), "Intermediate")
file.move(list.files(pattern='CellType_GSE52564_FCover20_Signature_Hu.csv'), "Intermediate")
#Remove .RData and clear environment to free up memory
rm(list=ls())
file.remove("temp.RData")
## [1] TRUE
gc()
##            used  (Mb) gc trigger   (Mb) limit (Mb)  max used   (Mb)
## Ncells  4340546 231.9   13091016  699.2         NA  13091016  699.2
## Vcells 11982255  91.5  607310898 4633.5     102400 747203584 5700.8